mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Generate language_id (#3284)
* Generate language_id from language names The language_id is generated from the SHA256 hash of the language's name * Test the validity of language ids All languages should have a positive 32bit integer as an id * Update languages.yml header in set-language-ids
This commit is contained in:
committed by
Brandon Black
parent
d46a529b6a
commit
0980e304b1
@@ -11,6 +11,8 @@ header = <<-EOF
|
|||||||
# ace_mode - A String name of the Ace Mode used for highlighting whenever
|
# ace_mode - A String name of the Ace Mode used for highlighting whenever
|
||||||
# a file is edited. This must match one of the filenames in http://git.io/3XO_Cg.
|
# a file is edited. This must match one of the filenames in http://git.io/3XO_Cg.
|
||||||
# Use "text" if a mode does not exist.
|
# Use "text" if a mode does not exist.
|
||||||
|
# codemirror_mode - A String name of the CodeMirror Mode used for highlighting whenever a file is edited.
|
||||||
|
# This must match a mode from https://git.io/vi9Fx
|
||||||
# wrap - Boolean wrap to enable line wrapping (default: false)
|
# wrap - Boolean wrap to enable line wrapping (default: false)
|
||||||
# extensions - An Array of associated extensions (the first one is
|
# extensions - An Array of associated extensions (the first one is
|
||||||
# considered the primary extension, the others should be
|
# considered the primary extension, the others should be
|
||||||
@@ -20,9 +22,9 @@ header = <<-EOF
|
|||||||
# search_term - Deprecated: Some languages may be indexed under a
|
# search_term - Deprecated: Some languages may be indexed under a
|
||||||
# different alias. Avoid defining new exceptions.
|
# different alias. Avoid defining new exceptions.
|
||||||
# language_id - Integer used as a language-name-independent indexed field so that we can rename
|
# language_id - Integer used as a language-name-independent indexed field so that we can rename
|
||||||
# languages in Linguist without reindexing all the code on GitHub. Must not be
|
# languages in Linguist without reindexing all the code on GitHub. Must not be
|
||||||
# changed for existing languages without the explicit permission of GitHub staff.
|
# changed for existing languages without the explicit permission of GitHub staff.
|
||||||
# color - CSS hex color to represent the language.
|
# color - CSS hex color to represent the language. Only used if type is "programming" or "prose".
|
||||||
# tm_scope - The TextMate scope that represents this programming
|
# tm_scope - The TextMate scope that represents this programming
|
||||||
# language. This should match one of the scopes listed in
|
# language. This should match one of the scopes listed in
|
||||||
# the grammars.yml file. Use "none" if there is no grammar
|
# the grammars.yml file. Use "none" if there is no grammar
|
||||||
@@ -36,21 +38,23 @@ header = <<-EOF
|
|||||||
# Please keep this list alphabetized. Capitalization comes before lowercase.
|
# Please keep this list alphabetized. Capitalization comes before lowercase.
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
require 'digest'
|
||||||
|
|
||||||
generated = true if ARGV[0] == "--force"
|
generated = true if ARGV[0] == "--force"
|
||||||
update = true if ARGV[0] == "--update"
|
update = true if ARGV[0] == "--update"
|
||||||
|
|
||||||
|
def generate_language_id(language)
|
||||||
|
Digest::SHA256.hexdigest(language).to_i(16) % (2**30 - 1)
|
||||||
|
end
|
||||||
|
|
||||||
if generated
|
if generated
|
||||||
puts "You're regenerating all of the language_id attributes for all Linguist "
|
puts "You're regenerating all of the language_id attributes for all Linguist "
|
||||||
puts "languages defined in languages.yml. This is almost certainly NOT what"
|
puts "languages defined in languages.yml. This is almost certainly NOT what"
|
||||||
puts "you meant to do!"
|
puts "you meant to do!"
|
||||||
|
|
||||||
language_index = 0
|
|
||||||
|
|
||||||
languages = YAML.load(File.read("lib/linguist/languages.yml"))
|
languages = YAML.load(File.read("lib/linguist/languages.yml"))
|
||||||
languages.each do |name, vals|
|
languages.each do |name, vals|
|
||||||
vals.merge!('language_id' => language_index)
|
vals.merge!('language_id' => generate_language_id(name))
|
||||||
language_index += 1
|
|
||||||
end
|
end
|
||||||
|
|
||||||
File.write("lib/linguist/languages.yml", header + YAML.dump(languages))
|
File.write("lib/linguist/languages.yml", header + YAML.dump(languages))
|
||||||
@@ -58,20 +62,12 @@ elsif update
|
|||||||
puts "Adding new language_id attributes to languages.yml that don't have one set"
|
puts "Adding new language_id attributes to languages.yml that don't have one set"
|
||||||
languages = YAML.load(File.read("lib/linguist/languages.yml"))
|
languages = YAML.load(File.read("lib/linguist/languages.yml"))
|
||||||
|
|
||||||
# First grab the maximum language_id
|
|
||||||
language_ids = []
|
|
||||||
languages.each { |name, vals| language_ids << vals['language_id'] if vals.has_key?('language_id')}
|
|
||||||
max_language_id = language_ids.max
|
|
||||||
puts "Current maximum language_id is #{max_language_id}"
|
|
||||||
|
|
||||||
missing_count = 0
|
missing_count = 0
|
||||||
language_index = max_language_id
|
|
||||||
|
|
||||||
languages.each do |name, vals|
|
languages.each do |name, vals|
|
||||||
unless vals.has_key?('language_id')
|
unless vals.has_key?('language_id')
|
||||||
language_index += 1
|
|
||||||
missing_count += 1
|
missing_count += 1
|
||||||
vals.merge!('language_id' => language_index)
|
vals.merge!('language_id' => generate_language_id(name))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -427,6 +427,14 @@ class TestLanguage < Minitest::Test
|
|||||||
assert missing.empty?, message
|
assert missing.empty?, message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_all_languages_have_a_valid_id
|
||||||
|
invalid = Language.all.select { |language| language.language_id < 0 || language.language_id >= (2**31 - 1) }
|
||||||
|
|
||||||
|
message = "The following languages do not have a valid language_id. Please use script/set-language-ids --update as per the contribution guidelines.\n"
|
||||||
|
invalid.each { |language| message << "#{language.name}\n" }
|
||||||
|
assert invalid.empty?, message
|
||||||
|
end
|
||||||
|
|
||||||
def test_all_language_id_are_unique
|
def test_all_language_id_are_unique
|
||||||
duplicates = Language.all.group_by{ |language| language.language_id }.select { |k, v| v.size > 1 }.map(&:first)
|
duplicates = Language.all.group_by{ |language| language.language_id }.select { |k, v| v.size > 1 }.map(&:first)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user