mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	use licensee to classify submodule licenses
This commit is contained in:
		@@ -24,4 +24,6 @@ Gem::Specification.new do |s|
 | 
				
			|||||||
  s.add_development_dependency 'rake'
 | 
					  s.add_development_dependency 'rake'
 | 
				
			||||||
  s.add_development_dependency 'yajl-ruby'
 | 
					  s.add_development_dependency 'yajl-ruby'
 | 
				
			||||||
  s.add_development_dependency 'color-proximity', '~> 0.2.1'
 | 
					  s.add_development_dependency 'color-proximity', '~> 0.2.1'
 | 
				
			||||||
 | 
					  s.add_development_dependency 'licensee', '~> 4.7.3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
end
 | 
					end
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,6 +3,7 @@ require "minitest/autorun"
 | 
				
			|||||||
require "mocha/setup"
 | 
					require "mocha/setup"
 | 
				
			||||||
require "linguist"
 | 
					require "linguist"
 | 
				
			||||||
require 'color-proximity'
 | 
					require 'color-proximity'
 | 
				
			||||||
 | 
					require 'licensee'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def fixtures_path
 | 
					def fixtures_path
 | 
				
			||||||
  File.expand_path("../fixtures", __FILE__)
 | 
					  File.expand_path("../fixtures", __FILE__)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,12 +3,13 @@ require_relative "./helper"
 | 
				
			|||||||
class TestGrammars < Minitest::Test
 | 
					class TestGrammars < Minitest::Test
 | 
				
			||||||
  ROOT = File.expand_path("../..", __FILE__)
 | 
					  ROOT = File.expand_path("../..", __FILE__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LICENSE_WHITELIST = [
 | 
					  PROJECT_WHITELIST = [
 | 
				
			||||||
    # This grammar's MIT license is inside a subdirectory.
 | 
					    # This grammar's MIT license is inside a subdirectory.
 | 
				
			||||||
    "vendor/grammars/SublimePapyrus",
 | 
					    "vendor/grammars/SublimePapyrus",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # This grammar has a nonstandard but acceptable license.
 | 
					    # This grammar has a nonstandard but acceptable license.
 | 
				
			||||||
    "vendor/grammars/gap-tmbundle",
 | 
					    "vendor/grammars/gap-tmbundle",
 | 
				
			||||||
 | 
					    "vendor/grammars/factor",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # These grammars have no license but have been grandfathered in. New grammars
 | 
					    # These grammars have no license but have been grandfathered in. New grammars
 | 
				
			||||||
    # must have a license that allows redistribution.
 | 
					    # must have a license that allows redistribution.
 | 
				
			||||||
@@ -16,6 +17,22 @@ class TestGrammars < Minitest::Test
 | 
				
			|||||||
    "vendor/grammars/x86-assembly-textmate-bundle"
 | 
					    "vendor/grammars/x86-assembly-textmate-bundle"
 | 
				
			||||||
  ].freeze
 | 
					  ].freeze
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # List of allowed SPDX license names
 | 
				
			||||||
 | 
					  LICENSE_WHITELIST = %w[
 | 
				
			||||||
 | 
					    apache-2.0
 | 
				
			||||||
 | 
					    bsd-2-clause
 | 
				
			||||||
 | 
					    bsd-3-clause
 | 
				
			||||||
 | 
					    cc-by-sa-3.0
 | 
				
			||||||
 | 
					    gpl-2.0
 | 
				
			||||||
 | 
					    gpl-3.0
 | 
				
			||||||
 | 
					    lgpl-3.0
 | 
				
			||||||
 | 
					    mit
 | 
				
			||||||
 | 
					    textmate
 | 
				
			||||||
 | 
					    unlicense
 | 
				
			||||||
 | 
					    wtfpl
 | 
				
			||||||
 | 
					    zlib
 | 
				
			||||||
 | 
					  ].freeze
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def setup
 | 
					  def setup
 | 
				
			||||||
    @grammars = YAML.load(File.read(File.join(ROOT, "grammars.yml")))
 | 
					    @grammars = YAML.load(File.read(File.join(ROOT, "grammars.yml")))
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
@@ -63,47 +80,35 @@ class TestGrammars < Minitest::Test
 | 
				
			|||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_submodules_have_licenses
 | 
					  def test_submodules_have_licenses
 | 
				
			||||||
    categories = submodule_paths.group_by do |submodule|
 | 
					    submodule_licenses.each do |submodule, license|
 | 
				
			||||||
      files = Dir[File.join(ROOT, submodule, "*")]
 | 
					      next if PROJECT_WHITELIST.include?(submodule)
 | 
				
			||||||
      license = files.find { |path| File.basename(path) =~ /\b(un)?licen[cs]e\b/i } || files.find { |path| File.basename(path) =~ /\bcopying\b/i }
 | 
					
 | 
				
			||||||
      if license.nil?
 | 
					      license_file = Licensee::Project.new(submodule).license_file
 | 
				
			||||||
        if readme = files.find { |path| File.basename(path) =~ /\Areadme\b/i }
 | 
					      submodule_name = submodule.split("/").last
 | 
				
			||||||
          license = readme if File.read(readme) =~ /\blicen[cs]e\b/i
 | 
					
 | 
				
			||||||
        end
 | 
					      if license_file
 | 
				
			||||||
      end
 | 
					        assert license, "Submodule #{submodule_name} contains an unrecognized license. Please update #{__FILE__} to recognize the license"
 | 
				
			||||||
      if license.nil?
 | 
					 | 
				
			||||||
        :unlicensed
 | 
					 | 
				
			||||||
      elsif classify_license(license)
 | 
					 | 
				
			||||||
        :licensed
 | 
					 | 
				
			||||||
      else
 | 
					      else
 | 
				
			||||||
        :unrecognized
 | 
					        assert license, "Submodule #{submodule_name} is unlicensed. All grammars must have a license that permits redistribution"
 | 
				
			||||||
      end
 | 
					 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    unlicensed = categories[:unlicensed] || []
 | 
					      assert LICENSE_WHITELIST.include?(license), "Submodule #{submodule_name} is licensed under the #{license} license"
 | 
				
			||||||
    unrecognized = categories[:unrecognized] || []
 | 
					 | 
				
			||||||
    disallowed_unlicensed = unlicensed - LICENSE_WHITELIST
 | 
					 | 
				
			||||||
    disallowed_unrecognized = unrecognized - LICENSE_WHITELIST
 | 
					 | 
				
			||||||
    extra_whitelist_entries = LICENSE_WHITELIST - (unlicensed | unrecognized)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    message = ""
 | 
					 | 
				
			||||||
    if disallowed_unlicensed.any?
 | 
					 | 
				
			||||||
      message << "The following grammar submodules don't seem to have a license. All grammars must have a license that permits redistribution.\n"
 | 
					 | 
				
			||||||
      message << disallowed_unlicensed.sort.join("\n")
 | 
					 | 
				
			||||||
    end
 | 
					 | 
				
			||||||
    if disallowed_unrecognized.any?
 | 
					 | 
				
			||||||
      message << "\n\n" unless message.empty?
 | 
					 | 
				
			||||||
      message << "The following grammar submodules have an unrecognized license. Please update #{__FILE__} to recognize the license.\n"
 | 
					 | 
				
			||||||
      message << disallowed_unrecognized.sort.join("\n")
 | 
					 | 
				
			||||||
    end
 | 
					 | 
				
			||||||
    if extra_whitelist_entries.any?
 | 
					 | 
				
			||||||
      message << "\n\n" unless message.empty?
 | 
					 | 
				
			||||||
      message << "The following grammar submodules are listed in LICENSE_WHITELIST but either have a license (yay!)\n"
 | 
					 | 
				
			||||||
      message << "or have been removed from the repository. Please remove them from the whitelist.\n"
 | 
					 | 
				
			||||||
      message << extra_whitelist_entries.sort.join("\n")
 | 
					 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert disallowed_unlicensed.empty? && disallowed_unrecognized.empty? && extra_whitelist_entries.empty?, message
 | 
					    unlicensed   = submodule_licenses.select { |k,v| v.nil? }
 | 
				
			||||||
 | 
					    unrecognized = submodule_licenses.select { |k,v| v.nil? && Licensee::Project.new(k).license_file}
 | 
				
			||||||
 | 
					    licensed     = submodule_licenses.reject { |k,v| v.nil? }
 | 
				
			||||||
 | 
					    unapproved   = licensed.reject { |k,v| LICENSE_WHITELIST.include?(v) }
 | 
				
			||||||
 | 
					    extra_whitelist_entries = PROJECT_WHITELIST - unlicensed.keys - unrecognized.keys
 | 
				
			||||||
 | 
					    assert unapproved.empty?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_whitelist_entries.each do |submodule|
 | 
				
			||||||
 | 
					      license = submodule_licenses[submodule]
 | 
				
			||||||
 | 
					      submodule_name = submodule.split("/").last
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      assert_equal nil, license, "Submodule #{submodule_name} is listed in PROJECT_WHITELIST but has a license"
 | 
				
			||||||
 | 
					      assert Dir.exists?(submodule), "Submodule #{submodule_name} is listed in PROJECT_WHITELIST but doesn't appear to be part of the project"
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private
 | 
					  private
 | 
				
			||||||
@@ -112,30 +117,61 @@ class TestGrammars < Minitest::Test
 | 
				
			|||||||
    @submodule_paths ||= `git config --list --file "#{File.join(ROOT, ".gitmodules")}"`.lines.grep(/\.path=/).map { |line| line.chomp.split("=", 2).last }
 | 
					    @submodule_paths ||= `git config --list --file "#{File.join(ROOT, ".gitmodules")}"`.lines.grep(/\.path=/).map { |line| line.chomp.split("=", 2).last }
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Returns a hash of submodules in the form of submodule_path => license
 | 
				
			||||||
 | 
					  def submodule_licenses
 | 
				
			||||||
 | 
					    @submodule_licenses = begin
 | 
				
			||||||
 | 
					      submodules = {}
 | 
				
			||||||
 | 
					      submodule_paths.each { |submodule| submodules[submodule] = submodule_license(submodule) }
 | 
				
			||||||
 | 
					      submodules
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Given the path to a submodule, return its SPDX-compliant license key
 | 
				
			||||||
 | 
					  def submodule_license(submodule)
 | 
				
			||||||
 | 
					    # Prefer Licensee to detect a submodule's license
 | 
				
			||||||
 | 
					    project = Licensee::Project.new(submodule)
 | 
				
			||||||
 | 
					    return project.license.key if project.license
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # We know a license file exists, but Licensee wasn't able to detect the license,
 | 
				
			||||||
 | 
					    # Let's try our own more permissive regex method
 | 
				
			||||||
 | 
					    if project.license_file
 | 
				
			||||||
 | 
					      path = File.expand_path project.license_file.path, submodule
 | 
				
			||||||
 | 
					      license = classify_license(path)
 | 
				
			||||||
 | 
					      return license if license
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Neither Licensee nor our own regex was able to detect the license, lets check the readme
 | 
				
			||||||
 | 
					    files = Dir[File.join(ROOT, submodule, "*")]
 | 
				
			||||||
 | 
					    if readme = files.find { |path| File.basename(path) =~ /\Areadme\b/i }
 | 
				
			||||||
 | 
					      classify_license(readme)
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def classify_license(path)
 | 
					  def classify_license(path)
 | 
				
			||||||
    content = File.read(path)
 | 
					    content = File.read(path)
 | 
				
			||||||
 | 
					    return unless content =~ /\blicen[cs]e\b/i
 | 
				
			||||||
    if content.include?("Apache License") && content.include?("2.0")
 | 
					    if content.include?("Apache License") && content.include?("2.0")
 | 
				
			||||||
      "Apache 2.0"
 | 
					      "apache-2.0"
 | 
				
			||||||
    elsif content.include?("GNU") && content =~ /general/i && content =~ /public/i
 | 
					    elsif content.include?("GNU") && content =~ /general/i && content =~ /public/i
 | 
				
			||||||
      if content =~ /version 2/i
 | 
					      if content =~ /version 2/i
 | 
				
			||||||
        "GPLv2"
 | 
					        "gpl-2.0"
 | 
				
			||||||
      elsif content =~ /version 3/i
 | 
					      elsif content =~ /version 3/i
 | 
				
			||||||
        "GPLv3"
 | 
					        "gpl-3.0"
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
    elsif content.include?("GPL") && content.include?("http://www.gnu.org/licenses/gpl.html")
 | 
					    elsif content.include?("GPL") && content.include?("http://www.gnu.org/licenses/gpl.html")
 | 
				
			||||||
      "GPLv3"
 | 
					      "gpl-3.0"
 | 
				
			||||||
    elsif content.include?("Creative Commons")
 | 
					    elsif content.include?("Creative Commons Attribution-Share Alike 3.0")
 | 
				
			||||||
      "CC"
 | 
					      "cc-by-sa-3.0"
 | 
				
			||||||
    elsif content.include?("tidy-license.txt") || content.include?("If not otherwise specified (see below)")
 | 
					    elsif content.include?("tidy-license.txt") || content.include?("If not otherwise specified (see below)")
 | 
				
			||||||
      "textmate"
 | 
					      "textmate"
 | 
				
			||||||
    elsif content =~ /^\s*[*-]\s+Redistribution/ || content.include?("Redistributions of source code")
 | 
					    elsif content =~ /^\s*[*-]\s+Redistribution/ || content.include?("Redistributions of source code")
 | 
				
			||||||
      "BSD"
 | 
					      "bsd"
 | 
				
			||||||
    elsif content.include?("Permission is hereby granted") || content =~ /\bMIT\b/
 | 
					    elsif content.include?("Permission is hereby granted") || content =~ /\bMIT\b/
 | 
				
			||||||
      "MIT"
 | 
					      "mit"
 | 
				
			||||||
    elsif content.include?("unlicense.org")
 | 
					    elsif content.include?("unlicense.org")
 | 
				
			||||||
      "unlicense"
 | 
					      "unlicense"
 | 
				
			||||||
    elsif content.include?("http://www.wtfpl.net/txt/copying/")
 | 
					    elsif content.include?("http://www.wtfpl.net/txt/copying/")
 | 
				
			||||||
      "WTFPL"
 | 
					      "wtfpl"
 | 
				
			||||||
    elsif content.include?("zlib") && content.include?("license") && content.include?("2. Altered source versions must be plainly marked as such")
 | 
					    elsif content.include?("zlib") && content.include?("license") && content.include?("2. Altered source versions must be plainly marked as such")
 | 
				
			||||||
      "zlib"
 | 
					      "zlib"
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user