diff --git a/script/set-language-ids b/script/set-language-ids index ecbe1e76..6b141ab4 100755 --- a/script/set-language-ids +++ b/script/set-language-ids @@ -11,6 +11,8 @@ header = <<-EOF # ace_mode - A String name of the Ace Mode used for highlighting whenever # a file is edited. This must match one of the filenames in http://git.io/3XO_Cg. # Use "text" if a mode does not exist. +# codemirror_mode - A String name of the CodeMirror Mode used for highlighting whenever a file is edited. +# This must match a mode from https://git.io/vi9Fx # wrap - Boolean wrap to enable line wrapping (default: false) # extensions - An Array of associated extensions (the first one is # considered the primary extension, the others should be @@ -20,9 +22,9 @@ header = <<-EOF # search_term - Deprecated: Some languages may be indexed under a # different alias. Avoid defining new exceptions. # language_id - Integer used as a language-name-independent indexed field so that we can rename -# languages in Linguist without reindexing all the code on GitHub. Must not be +# languages in Linguist without reindexing all the code on GitHub. Must not be # changed for existing languages without the explicit permission of GitHub staff. -# color - CSS hex color to represent the language. +# color - CSS hex color to represent the language. Only used if type is "programming" or "prose". # tm_scope - The TextMate scope that represents this programming # language. This should match one of the scopes listed in # the grammars.yml file. Use "none" if there is no grammar @@ -36,21 +38,23 @@ header = <<-EOF # Please keep this list alphabetized. Capitalization comes before lowercase. EOF +require 'digest' generated = true if ARGV[0] == "--force" update = true if ARGV[0] == "--update" +def generate_language_id(language) + Digest::SHA256.hexdigest(language).to_i(16) % (2**30 - 1) +end + if generated puts "You're regenerating all of the language_id attributes for all Linguist " puts "languages defined in languages.yml. This is almost certainly NOT what" puts "you meant to do!" - language_index = 0 - languages = YAML.load(File.read("lib/linguist/languages.yml")) languages.each do |name, vals| - vals.merge!('language_id' => language_index) - language_index += 1 + vals.merge!('language_id' => generate_language_id(name)) end File.write("lib/linguist/languages.yml", header + YAML.dump(languages)) @@ -58,20 +62,12 @@ elsif update puts "Adding new language_id attributes to languages.yml that don't have one set" languages = YAML.load(File.read("lib/linguist/languages.yml")) - # First grab the maximum language_id - language_ids = [] - languages.each { |name, vals| language_ids << vals['language_id'] if vals.has_key?('language_id')} - max_language_id = language_ids.max - puts "Current maximum language_id is #{max_language_id}" - missing_count = 0 - language_index = max_language_id languages.each do |name, vals| unless vals.has_key?('language_id') - language_index += 1 missing_count += 1 - vals.merge!('language_id' => language_index) + vals.merge!('language_id' => generate_language_id(name)) end end diff --git a/test/test_language.rb b/test/test_language.rb index 7dc7dc5d..a82040e2 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -427,6 +427,14 @@ class TestLanguage < Minitest::Test assert missing.empty?, message end + def test_all_languages_have_a_valid_id + invalid = Language.all.select { |language| language.language_id < 0 || language.language_id >= (2**31 - 1) } + + message = "The following languages do not have a valid language_id. Please use script/set-language-ids --update as per the contribution guidelines.\n" + invalid.each { |language| message << "#{language.name}\n" } + assert invalid.empty?, message + end + def test_all_language_id_are_unique duplicates = Language.all.group_by{ |language| language.language_id }.select { |k, v| v.size > 1 }.map(&:first)