Add language aliases

This commit is contained in:
Joshua Peek
2011-05-29 19:27:51 -05:00
parent bb078bf063
commit ba7c2c759a
5 changed files with 173 additions and 49 deletions

View File

@@ -369,7 +369,7 @@ module Linguist
if lang = Shebangs[script] if lang = Shebangs[script]
lang lang
else else
lang = Language.find_by_lexer(script) lang = Language[script]
lang != Language['Text'] ? lang : nil lang != Language['Text'] ? lang : nil
end end
end end

View File

@@ -6,6 +6,7 @@ module Linguist
class Language class Language
@languages = [] @languages = []
@name_index = {} @name_index = {}
@alias_index = {}
@lexer_index = {} @lexer_index = {}
@extension_index = {} @extension_index = {}
@@ -27,6 +28,16 @@ module Linguist
# Case-insensitive language name index # Case-insensitive language name index
@name_index[language.name.downcase] = language @name_index[language.name.downcase] = language
language.aliases.each do |name|
# All Language aliases should be unique. Warn if there is a duplicate.
if @alias_index.key?(name)
warn "Duplicate alias: #{name}"
end
@alias_index[name] = language
end
# Set langauge as the default for reverse lexer lookup if # Set langauge as the default for reverse lexer lookup if
# :default_lexer is set or the language is the same name as its # :default_lexer is set or the language is the same name as its
# lexer. # lexer.
@@ -75,6 +86,20 @@ module Linguist
@name_index[name.downcase] @name_index[name.downcase]
end end
# Public: Look up Language by one of its aliases.
#
# name - A case-sensitive String alias of the Language
#
# Examples
#
# Language.find_by_alias('cpp')
# # => #<Language name="C++">
#
# Returns the Lexer or nil if none was found.
def self.find_by_alias(name)
@alias_index[name.downcase]
end
# Public: Look up Language by extension. # Public: Look up Language by extension.
# #
# extension - The extension String. May include leading "." # extension - The extension String. May include leading "."
@@ -89,23 +114,6 @@ module Linguist
@extension_index[extension] @extension_index[extension]
end end
# Deprecated: Look up Language by its lexer.
#
# The use of this method is discouraged since multiple languages
# may have the same lexer name.
#
# name - The case-insensitive String lexer of the Language
#
# Examples
#
# Language.find_by_lexer('cpp')
# # => #<Language name="C++">
#
# Returns the Language or Language['Text'] if none was found.
def self.find_by_lexer(lexer)
@lexer_index[lexer.downcase] || self['Text']
end
# Public: Look up Language by its name or lexer. # Public: Look up Language by its name or lexer.
# #
# name - The case-insensitive String name of the Language # name - The case-insensitive String name of the Language
@@ -120,7 +128,7 @@ module Linguist
# #
# Returns the Language or nil if none was found. # Returns the Language or nil if none was found.
def self.[](name) def self.[](name)
find_by_name(name) || find_by_lexer(name) find_by_name(name) || find_by_alias(name) || self['Text']
end end
# Public: A List of popular languages # Public: A List of popular languages
@@ -154,8 +162,11 @@ module Linguist
# @name is required # @name is required
@name = attributes[:name] || raise(ArgumentError, "missing name") @name = attributes[:name] || raise(ArgumentError, "missing name")
# Set aliases
@aliases = [default_alias_name] + (attributes[:aliases] || [])
# Use :lexer_name or fallback to `@name.downcase` # Use :lexer_name or fallback to `@name.downcase`
@lexer_name = attributes[:lexer_name] || default_lexer_name @lexer_name = attributes[:lexer_name] || default_alias_name
# Lookup Lexer object # Lookup Lexer object
@lexer = Lexer.find_by_alias(@lexer_name) @lexer = Lexer.find_by_alias(@lexer_name)
@@ -180,6 +191,16 @@ module Linguist
# Returns the name String # Returns the name String
attr_reader :name attr_reader :name
# Public: Get aliases
#
# Examples
#
# Language['C++'].aliases
# # => ["cpp"]
#
# Returns an Array of String names
attr_reader :aliases
# Deprecated: Get lexer name # Deprecated: Get lexer name
# #
# Examples # Examples
@@ -205,10 +226,10 @@ module Linguist
# Returns the extensions Array # Returns the extensions Array
attr_reader :extensions attr_reader :extensions
# Internal: Get default lexer name # Internal: Get default alias name
# #
# Returns the lexer name String # Returns the alias name String
def default_lexer_name def default_alias_name
name.downcase.gsub(/\s/, '-') name.downcase.gsub(/\s/, '-')
end end
@@ -216,7 +237,7 @@ module Linguist
# #
# Returns true or false # Returns true or false
def default_lexer? def default_lexer?
lexer_name == default_lexer_name lexer_name == default_alias_name
end end
def search_term def search_term
@@ -286,12 +307,13 @@ module Linguist
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
Language.create( Language.create(
:name => name, :name => name,
:aliases => options[:aliases],
:lexer_name => options[:lexer], :lexer_name => options[:lexer],
:default_lexer => options[:default_lexer], :default_lexer => options[:default_lexer],
:extensions => options[:ext], :extensions => options[:ext],
:popular => popular.include?(name), :popular => popular.include?(name),
:common => common.include?(name) :common => common.include?(name)
) )
end end
end end

View File

@@ -1,10 +1,14 @@
# Defines all Lanaguges known to GitHub # Defines all Lanaguges known to GitHub
# #
# lexer - An explicit lexer String (defaults to name.downcase) # lexer - An explicit lexer String (defaults to name.downcase)
# ext - An Array of associated extensions # aliases - An Array of additional aliases (implicitly includes name.downcase)
# ext - An Array of associated extensions
ASP: ASP:
:lexer: aspx-vb :lexer: aspx-vb
:aliases:
- aspx
- aspx-vb
:ext: :ext:
- .ascx - .ascx
- .axd - .axd
@@ -15,6 +19,8 @@ ASP:
- .asp - .asp
ActionScript: ActionScript:
:lexer: as3 :lexer: as3
:aliases:
- as3
:ext: :ext:
- .as - .as
Ada: Ada:
@@ -29,10 +35,14 @@ Arc:
- .arc - .arc
Assembly: Assembly:
:lexer: nasm :lexer: nasm
:aliases:
- nasm
:ext: :ext:
- .asm - .asm
Batchfile: Batchfile:
:lexer: bat :lexer: bat
:aliases:
- bat
:ext: :ext:
- .bat - .bat
- .cmd - .cmd
@@ -55,10 +65,14 @@ C:
- .h - .h
C#: C#:
:lexer: csharp :lexer: csharp
:aliases:
- csharp
:ext: :ext:
- .cs - .cs
C++: C++:
:lexer: cpp :lexer: cpp
:aliases:
- cpp
:ext: :ext:
- .cpp - .cpp
- .hh - .hh
@@ -85,10 +99,14 @@ CoffeeScript:
- Cakefile - Cakefile
ColdFusion: ColdFusion:
:lexer: cfm :lexer: cfm
:aliases:
- cfm
:ext: :ext:
- .cfm - .cfm
- .cfc - .cfc
Common Lisp: Common Lisp:
:aliases:
- lisp
:ext: :ext:
- .cl - .cl
- .lisp - .lisp
@@ -107,6 +125,8 @@ D:
- .di - .di
Darcs Patch: Darcs Patch:
:lexer: dpatch :lexer: dpatch
:aliases:
- dpatch
:ext: :ext:
- .darcspatch - .darcspatch
- .dpatch - .dpatch
@@ -128,6 +148,8 @@ Eiffel:
- .e - .e
Emacs Lisp: Emacs Lisp:
:lexer: scheme :lexer: scheme
:aliases:
- elisp
:ext: :ext:
- .el - .el
- .emacs - .emacs
@@ -167,6 +189,8 @@ Gentoo Eclass:
- .eclass - .eclass
Gettext Catalog: Gettext Catalog:
:lexer: pot :lexer: pot
:aliases:
- pot
:ext: :ext:
- .po - .po
- .pot - .pot
@@ -212,6 +236,8 @@ INI:
- .properties - .properties
IRC log: IRC log:
:lexer: irc :lexer: irc
:aliases:
- irc
:ext: :ext:
- .weechatlog - .weechatlog
Io: Io:
@@ -226,6 +252,8 @@ Groovy:
:ext: :ext:
- .groovy - .groovy
JavaScript: JavaScript:
:aliases:
- js
:ext: :ext:
- .js - .js
- .sjs - .sjs
@@ -240,6 +268,8 @@ JSON:
- .json - .json
Literate Haskell: Literate Haskell:
:lexer: lhs :lexer: lhs
:aliases:
- lhs
:ext: :ext:
- .lhs - .lhs
LLVM: LLVM:
@@ -326,6 +356,8 @@ PHP:
- .php5 - .php5
Parrot Internal Representation: Parrot Internal Representation:
:lexer: pir :lexer: pir
:aliases:
- pir
:ext: :ext:
- .pir - .pir
- .pbc - .pbc
@@ -349,6 +381,8 @@ Python:
- .pyw - .pyw
Python traceback: Python traceback:
:lexer: pytb :lexer: pytb
:aliases:
- pytb
:ext: :ext:
- .pytb - .pytb
R: R:
@@ -365,6 +399,8 @@ RHTML:
- .rhtml - .rhtml
Raw token data: Raw token data:
:lexer: raw :lexer: raw
:aliases:
- raw
:ext: :ext:
- .raw - .raw
Rebol: Rebol:
@@ -412,6 +448,10 @@ Self:
Shell: Shell:
:lexer: bash :lexer: bash
:default_lexer: true :default_lexer: true
:aliases:
- sh
- bash
- zsh
:ext: :ext:
- .sh - .sh
- .zsh - .zsh
@@ -457,6 +497,8 @@ Verilog:
- .v - .v
VimL: VimL:
:lexer: vim :lexer: vim
:aliases:
- vim
:ext: :ext:
- .vim - .vim
- .vimrc - .vimrc
@@ -493,6 +535,8 @@ YAML:
- .yaml - .yaml
Java Server Pages: Java Server Pages:
:lexer: jsp :lexer: jsp
:aliases:
- jsp
:ext: :ext:
- .jsp - .jsp
mupad: mupad:
@@ -503,6 +547,8 @@ ooc:
- .ooc - .ooc
reStructuredText: reStructuredText:
:lexer: rst :lexer: rst
:aliases:
- rst
:ext: :ext:
- .rst - .rst
- .rest - .rest

View File

@@ -17,9 +17,3 @@ JavaScript:
Ruby: Ruby:
- macruby - macruby
- rake - rake
# Other shells
Shell:
- bash
- sh
- zsh

View File

@@ -15,6 +15,82 @@ class TestLanguage < Test::Unit::TestCase
def test_find_all_by_name def test_find_all_by_name
Language.all.each do |language| Language.all.each do |language|
assert_equal language, Language.find_by_name(language.name) assert_equal language, Language.find_by_name(language.name)
assert_equal language, Language[language.name]
end
end
def test_find_by_alias
assert_equal Language['Perl'], Language.find_by_alias('perl')
assert_equal Language['Python'], Language.find_by_alias('python')
assert_equal Language['Ruby'], Language.find_by_alias('ruby')
assert_equal Language['HTML+ERB'], Language.find_by_alias('html+erb')
assert_equal Language['Max/MSP'], Language.find_by_alias('max/msp')
assert_equal Language['Pure Data'], Language.find_by_alias('pure-data')
assert_equal Language['ASP'], Language.find_by_alias('asp')
assert_equal Language['ASP'], Language.find_by_alias('aspx')
assert_equal Language['ASP'], Language.find_by_alias('aspx-vb')
assert_equal Language['ActionScript'], Language.find_by_alias('as3')
assert_equal Language['Assembly'], Language.find_by_alias('nasm')
assert_equal Language['Batchfile'], Language.find_by_alias('bat')
assert_equal Language['C++'], Language.find_by_alias('c++')
assert_equal Language['C++'], Language.find_by_alias('cpp')
assert_equal Language['C#'], Language.find_by_alias('c#')
assert_equal Language['C#'], Language.find_by_alias('csharp')
assert_equal Language['Java'], Language.find_by_alias('java')
assert_equal Language['ChucK'], Language.find_by_alias('chuck')
assert_equal Language['Groovy'], Language.find_by_alias('groovy')
assert_equal Language['Java Server Pages'], Language.find_by_alias('jsp')
assert_equal Language['ColdFusion'], Language.find_by_alias('cfm')
assert_equal Language['Darcs Patch'], Language.find_by_alias('dpatch')
assert_equal Language['Common Lisp'], Language.find_by_alias('common-lisp')
assert_equal Language['Common Lisp'], Language.find_by_alias('lisp')
assert_equal Language['Emacs Lisp'], Language.find_by_alias('emacs-lisp')
assert_equal Language['Emacs Lisp'], Language.find_by_alias('elisp')
assert_equal Language['Nu'], Language.find_by_alias('nu')
assert_equal Language['Scheme'], Language.find_by_alias('scheme')
assert_equal Language['OCaml'], Language.find_by_alias('ocaml')
assert_equal Language['F#'], Language.find_by_alias('f#')
assert_equal Language['Gettext Catalog'], Language.find_by_alias('pot')
assert_equal Language['IRC log'], Language.find_by_alias('irc')
assert_equal Language['JavaScript'], Language.find_by_alias('javascript')
assert_equal Language['JavaScript'], Language.find_by_alias('js')
assert_equal Language['JSON'], Language.find_by_alias('json')
assert_equal Language['Haskell'], Language.find_by_alias('haskell')
assert_equal Language['Literate Haskell'], Language.find_by_alias('literate-haskell')
assert_equal Language['Literate Haskell'], Language.find_by_alias('lhs')
assert_equal Language['Parrot Internal Representation'], Language.find_by_alias('pir')
assert_equal Language['Python traceback'], Language.find_by_alias('pytb')
assert_equal Language['Raw token data'], Language.find_by_alias('raw')
assert_equal Language['reStructuredText'], Language.find_by_alias('rst')
assert_equal Language['Shell'], Language.find_by_alias('shell')
assert_equal Language['Shell'], Language.find_by_alias('sh')
assert_equal Language['Shell'], Language.find_by_alias('bash')
assert_equal Language['Shell'], Language.find_by_alias('zsh')
assert_equal Language['VimL'], Language.find_by_alias('viml')
assert_equal Language['VimL'], Language.find_by_alias('vim')
assert_equal Language['XS'], Language.find_by_alias('xs')
end
def test_find_all_by_alias
Language.all.each do |language|
language.aliases.each do |name|
assert_equal language, Language.find_by_alias(name)
assert_equal language, Language[name]
end
end end
end end
@@ -32,20 +108,6 @@ class TestLanguage < Test::Unit::TestCase
end end
end end
def test_find_by_lexer
assert_equal Language['C'], Language.find_by_lexer('c')
assert_equal Language['C++'], Language.find_by_lexer('cpp')
assert_equal Language['Java'], Language.find_by_lexer('java')
assert_equal Language['JavaScript'], Language.find_by_lexer('javascript')
assert_equal Language['OCaml'], Language.find_by_lexer('ocaml')
assert_equal Language['Perl'], Language.find_by_lexer('perl')
assert_equal Language['Python'], Language.find_by_lexer('python')
assert_equal Language['Ruby'], Language.find_by_lexer('ruby')
assert_equal Language['Scheme'], Language.find_by_lexer('scheme')
assert_equal Language['Shell'], Language.find_by_lexer('bash')
assert_equal Language['Text'], Language.find_by_lexer('kt')
end
def test_find def test_find
assert_equal "Ruby", Language['Ruby'].name assert_equal "Ruby", Language['Ruby'].name
assert_equal "Ruby", Language['ruby'].name assert_equal "Ruby", Language['ruby'].name