diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 5fab5686..ac6b7f8a 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -369,7 +369,7 @@ module Linguist if lang = Shebangs[script] lang else - lang = Language.find_by_lexer(script) + lang = Language[script] lang != Language['Text'] ? lang : nil end end diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 8b52abb6..e0d7ff61 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -6,6 +6,7 @@ module Linguist class Language @languages = [] @name_index = {} + @alias_index = {} @lexer_index = {} @extension_index = {} @@ -27,6 +28,16 @@ module Linguist # Case-insensitive language name index @name_index[language.name.downcase] = language + language.aliases.each do |name| + # All Language aliases should be unique. Warn if there is a duplicate. + if @alias_index.key?(name) + warn "Duplicate alias: #{name}" + end + + @alias_index[name] = language + end + + # Set langauge as the default for reverse lexer lookup if # :default_lexer is set or the language is the same name as its # lexer. @@ -75,6 +86,20 @@ module Linguist @name_index[name.downcase] end + # Public: Look up Language by one of its aliases. + # + # name - A case-sensitive String alias of the Language + # + # Examples + # + # Language.find_by_alias('cpp') + # # => # + # + # Returns the Lexer or nil if none was found. + def self.find_by_alias(name) + @alias_index[name.downcase] + end + # Public: Look up Language by extension. # # extension - The extension String. May include leading "." @@ -89,23 +114,6 @@ module Linguist @extension_index[extension] end - # Deprecated: Look up Language by its lexer. - # - # The use of this method is discouraged since multiple languages - # may have the same lexer name. - # - # name - The case-insensitive String lexer of the Language - # - # Examples - # - # Language.find_by_lexer('cpp') - # # => # - # - # Returns the Language or Language['Text'] if none was found. - def self.find_by_lexer(lexer) - @lexer_index[lexer.downcase] || self['Text'] - end - # Public: Look up Language by its name or lexer. # # name - The case-insensitive String name of the Language @@ -120,7 +128,7 @@ module Linguist # # Returns the Language or nil if none was found. def self.[](name) - find_by_name(name) || find_by_lexer(name) + find_by_name(name) || find_by_alias(name) || self['Text'] end # Public: A List of popular languages @@ -154,8 +162,11 @@ module Linguist # @name is required @name = attributes[:name] || raise(ArgumentError, "missing name") + # Set aliases + @aliases = [default_alias_name] + (attributes[:aliases] || []) + # Use :lexer_name or fallback to `@name.downcase` - @lexer_name = attributes[:lexer_name] || default_lexer_name + @lexer_name = attributes[:lexer_name] || default_alias_name # Lookup Lexer object @lexer = Lexer.find_by_alias(@lexer_name) @@ -180,6 +191,16 @@ module Linguist # Returns the name String attr_reader :name + # Public: Get aliases + # + # Examples + # + # Language['C++'].aliases + # # => ["cpp"] + # + # Returns an Array of String names + attr_reader :aliases + # Deprecated: Get lexer name # # Examples @@ -205,10 +226,10 @@ module Linguist # Returns the extensions Array attr_reader :extensions - # Internal: Get default lexer name + # Internal: Get default alias name # - # Returns the lexer name String - def default_lexer_name + # Returns the alias name String + def default_alias_name name.downcase.gsub(/\s/, '-') end @@ -216,7 +237,7 @@ module Linguist # # Returns true or false def default_lexer? - lexer_name == default_lexer_name + lexer_name == default_alias_name end def search_term @@ -286,12 +307,13 @@ module Linguist YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| Language.create( - :name => name, + :name => name, + :aliases => options[:aliases], :lexer_name => options[:lexer], :default_lexer => options[:default_lexer], :extensions => options[:ext], - :popular => popular.include?(name), - :common => common.include?(name) + :popular => popular.include?(name), + :common => common.include?(name) ) end end diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 35491383..9615ffbb 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -1,10 +1,14 @@ # Defines all Lanaguges known to GitHub # -# lexer - An explicit lexer String (defaults to name.downcase) -# ext - An Array of associated extensions +# lexer - An explicit lexer String (defaults to name.downcase) +# aliases - An Array of additional aliases (implicitly includes name.downcase) +# ext - An Array of associated extensions ASP: :lexer: aspx-vb + :aliases: + - aspx + - aspx-vb :ext: - .ascx - .axd @@ -15,6 +19,8 @@ ASP: - .asp ActionScript: :lexer: as3 + :aliases: + - as3 :ext: - .as Ada: @@ -29,10 +35,14 @@ Arc: - .arc Assembly: :lexer: nasm + :aliases: + - nasm :ext: - .asm Batchfile: :lexer: bat + :aliases: + - bat :ext: - .bat - .cmd @@ -55,10 +65,14 @@ C: - .h C#: :lexer: csharp + :aliases: + - csharp :ext: - .cs C++: :lexer: cpp + :aliases: + - cpp :ext: - .cpp - .hh @@ -85,10 +99,14 @@ CoffeeScript: - Cakefile ColdFusion: :lexer: cfm + :aliases: + - cfm :ext: - .cfm - .cfc Common Lisp: + :aliases: + - lisp :ext: - .cl - .lisp @@ -107,6 +125,8 @@ D: - .di Darcs Patch: :lexer: dpatch + :aliases: + - dpatch :ext: - .darcspatch - .dpatch @@ -128,6 +148,8 @@ Eiffel: - .e Emacs Lisp: :lexer: scheme + :aliases: + - elisp :ext: - .el - .emacs @@ -167,6 +189,8 @@ Gentoo Eclass: - .eclass Gettext Catalog: :lexer: pot + :aliases: + - pot :ext: - .po - .pot @@ -212,6 +236,8 @@ INI: - .properties IRC log: :lexer: irc + :aliases: + - irc :ext: - .weechatlog Io: @@ -226,6 +252,8 @@ Groovy: :ext: - .groovy JavaScript: + :aliases: + - js :ext: - .js - .sjs @@ -240,6 +268,8 @@ JSON: - .json Literate Haskell: :lexer: lhs + :aliases: + - lhs :ext: - .lhs LLVM: @@ -326,6 +356,8 @@ PHP: - .php5 Parrot Internal Representation: :lexer: pir + :aliases: + - pir :ext: - .pir - .pbc @@ -349,6 +381,8 @@ Python: - .pyw Python traceback: :lexer: pytb + :aliases: + - pytb :ext: - .pytb R: @@ -365,6 +399,8 @@ RHTML: - .rhtml Raw token data: :lexer: raw + :aliases: + - raw :ext: - .raw Rebol: @@ -412,6 +448,10 @@ Self: Shell: :lexer: bash :default_lexer: true + :aliases: + - sh + - bash + - zsh :ext: - .sh - .zsh @@ -457,6 +497,8 @@ Verilog: - .v VimL: :lexer: vim + :aliases: + - vim :ext: - .vim - .vimrc @@ -493,6 +535,8 @@ YAML: - .yaml Java Server Pages: :lexer: jsp + :aliases: + - jsp :ext: - .jsp mupad: @@ -503,6 +547,8 @@ ooc: - .ooc reStructuredText: :lexer: rst + :aliases: + - rst :ext: - .rst - .rest diff --git a/lib/linguist/shebangs.yml b/lib/linguist/shebangs.yml index fe19e27d..e45ba91f 100644 --- a/lib/linguist/shebangs.yml +++ b/lib/linguist/shebangs.yml @@ -17,9 +17,3 @@ JavaScript: Ruby: - macruby - rake - -# Other shells -Shell: -- bash -- sh -- zsh diff --git a/test/test_language.rb b/test/test_language.rb index 3dd86788..933566b3 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -15,6 +15,82 @@ class TestLanguage < Test::Unit::TestCase def test_find_all_by_name Language.all.each do |language| assert_equal language, Language.find_by_name(language.name) + assert_equal language, Language[language.name] + end + end + + def test_find_by_alias + assert_equal Language['Perl'], Language.find_by_alias('perl') + assert_equal Language['Python'], Language.find_by_alias('python') + assert_equal Language['Ruby'], Language.find_by_alias('ruby') + assert_equal Language['HTML+ERB'], Language.find_by_alias('html+erb') + assert_equal Language['Max/MSP'], Language.find_by_alias('max/msp') + assert_equal Language['Pure Data'], Language.find_by_alias('pure-data') + + assert_equal Language['ASP'], Language.find_by_alias('asp') + assert_equal Language['ASP'], Language.find_by_alias('aspx') + assert_equal Language['ASP'], Language.find_by_alias('aspx-vb') + assert_equal Language['ActionScript'], Language.find_by_alias('as3') + assert_equal Language['Assembly'], Language.find_by_alias('nasm') + assert_equal Language['Batchfile'], Language.find_by_alias('bat') + + assert_equal Language['C++'], Language.find_by_alias('c++') + assert_equal Language['C++'], Language.find_by_alias('cpp') + assert_equal Language['C#'], Language.find_by_alias('c#') + assert_equal Language['C#'], Language.find_by_alias('csharp') + + assert_equal Language['Java'], Language.find_by_alias('java') + assert_equal Language['ChucK'], Language.find_by_alias('chuck') + assert_equal Language['Groovy'], Language.find_by_alias('groovy') + assert_equal Language['Java Server Pages'], Language.find_by_alias('jsp') + + assert_equal Language['ColdFusion'], Language.find_by_alias('cfm') + assert_equal Language['Darcs Patch'], Language.find_by_alias('dpatch') + + assert_equal Language['Common Lisp'], Language.find_by_alias('common-lisp') + assert_equal Language['Common Lisp'], Language.find_by_alias('lisp') + assert_equal Language['Emacs Lisp'], Language.find_by_alias('emacs-lisp') + assert_equal Language['Emacs Lisp'], Language.find_by_alias('elisp') + assert_equal Language['Nu'], Language.find_by_alias('nu') + assert_equal Language['Scheme'], Language.find_by_alias('scheme') + + assert_equal Language['OCaml'], Language.find_by_alias('ocaml') + assert_equal Language['F#'], Language.find_by_alias('f#') + assert_equal Language['Gettext Catalog'], Language.find_by_alias('pot') + assert_equal Language['IRC log'], Language.find_by_alias('irc') + + assert_equal Language['JavaScript'], Language.find_by_alias('javascript') + assert_equal Language['JavaScript'], Language.find_by_alias('js') + assert_equal Language['JSON'], Language.find_by_alias('json') + + assert_equal Language['Haskell'], Language.find_by_alias('haskell') + assert_equal Language['Literate Haskell'], Language.find_by_alias('literate-haskell') + assert_equal Language['Literate Haskell'], Language.find_by_alias('lhs') + + assert_equal Language['Parrot Internal Representation'], Language.find_by_alias('pir') + + assert_equal Language['Python traceback'], Language.find_by_alias('pytb') + + assert_equal Language['Raw token data'], Language.find_by_alias('raw') + assert_equal Language['reStructuredText'], Language.find_by_alias('rst') + + assert_equal Language['Shell'], Language.find_by_alias('shell') + assert_equal Language['Shell'], Language.find_by_alias('sh') + assert_equal Language['Shell'], Language.find_by_alias('bash') + assert_equal Language['Shell'], Language.find_by_alias('zsh') + + assert_equal Language['VimL'], Language.find_by_alias('viml') + assert_equal Language['VimL'], Language.find_by_alias('vim') + + assert_equal Language['XS'], Language.find_by_alias('xs') + end + + def test_find_all_by_alias + Language.all.each do |language| + language.aliases.each do |name| + assert_equal language, Language.find_by_alias(name) + assert_equal language, Language[name] + end end end @@ -32,20 +108,6 @@ class TestLanguage < Test::Unit::TestCase end end - def test_find_by_lexer - assert_equal Language['C'], Language.find_by_lexer('c') - assert_equal Language['C++'], Language.find_by_lexer('cpp') - assert_equal Language['Java'], Language.find_by_lexer('java') - assert_equal Language['JavaScript'], Language.find_by_lexer('javascript') - assert_equal Language['OCaml'], Language.find_by_lexer('ocaml') - assert_equal Language['Perl'], Language.find_by_lexer('perl') - assert_equal Language['Python'], Language.find_by_lexer('python') - assert_equal Language['Ruby'], Language.find_by_lexer('ruby') - assert_equal Language['Scheme'], Language.find_by_lexer('scheme') - assert_equal Language['Shell'], Language.find_by_lexer('bash') - assert_equal Language['Text'], Language.find_by_lexer('kt') - end - def test_find assert_equal "Ruby", Language['Ruby'].name assert_equal "Ruby", Language['ruby'].name