Add language aliases

This commit is contained in:
Joshua Peek
2011-05-29 19:27:51 -05:00
parent bb078bf063
commit ba7c2c759a
5 changed files with 173 additions and 49 deletions

View File

@@ -369,7 +369,7 @@ module Linguist
if lang = Shebangs[script]
lang
else
lang = Language.find_by_lexer(script)
lang = Language[script]
lang != Language['Text'] ? lang : nil
end
end

View File

@@ -6,6 +6,7 @@ module Linguist
class Language
@languages = []
@name_index = {}
@alias_index = {}
@lexer_index = {}
@extension_index = {}
@@ -27,6 +28,16 @@ module Linguist
# Case-insensitive language name index
@name_index[language.name.downcase] = language
language.aliases.each do |name|
# All Language aliases should be unique. Warn if there is a duplicate.
if @alias_index.key?(name)
warn "Duplicate alias: #{name}"
end
@alias_index[name] = language
end
# Set langauge as the default for reverse lexer lookup if
# :default_lexer is set or the language is the same name as its
# lexer.
@@ -75,6 +86,20 @@ module Linguist
@name_index[name.downcase]
end
# Public: Look up Language by one of its aliases.
#
# name - A case-sensitive String alias of the Language
#
# Examples
#
# Language.find_by_alias('cpp')
# # => #<Language name="C++">
#
# Returns the Lexer or nil if none was found.
def self.find_by_alias(name)
@alias_index[name.downcase]
end
# Public: Look up Language by extension.
#
# extension - The extension String. May include leading "."
@@ -89,23 +114,6 @@ module Linguist
@extension_index[extension]
end
# Deprecated: Look up Language by its lexer.
#
# The use of this method is discouraged since multiple languages
# may have the same lexer name.
#
# name - The case-insensitive String lexer of the Language
#
# Examples
#
# Language.find_by_lexer('cpp')
# # => #<Language name="C++">
#
# Returns the Language or Language['Text'] if none was found.
def self.find_by_lexer(lexer)
@lexer_index[lexer.downcase] || self['Text']
end
# Public: Look up Language by its name or lexer.
#
# name - The case-insensitive String name of the Language
@@ -120,7 +128,7 @@ module Linguist
#
# Returns the Language or nil if none was found.
def self.[](name)
find_by_name(name) || find_by_lexer(name)
find_by_name(name) || find_by_alias(name) || self['Text']
end
# Public: A List of popular languages
@@ -154,8 +162,11 @@ module Linguist
# @name is required
@name = attributes[:name] || raise(ArgumentError, "missing name")
# Set aliases
@aliases = [default_alias_name] + (attributes[:aliases] || [])
# Use :lexer_name or fallback to `@name.downcase`
@lexer_name = attributes[:lexer_name] || default_lexer_name
@lexer_name = attributes[:lexer_name] || default_alias_name
# Lookup Lexer object
@lexer = Lexer.find_by_alias(@lexer_name)
@@ -180,6 +191,16 @@ module Linguist
# Returns the name String
attr_reader :name
# Public: Get aliases
#
# Examples
#
# Language['C++'].aliases
# # => ["cpp"]
#
# Returns an Array of String names
attr_reader :aliases
# Deprecated: Get lexer name
#
# Examples
@@ -205,10 +226,10 @@ module Linguist
# Returns the extensions Array
attr_reader :extensions
# Internal: Get default lexer name
# Internal: Get default alias name
#
# Returns the lexer name String
def default_lexer_name
# Returns the alias name String
def default_alias_name
name.downcase.gsub(/\s/, '-')
end
@@ -216,7 +237,7 @@ module Linguist
#
# Returns true or false
def default_lexer?
lexer_name == default_lexer_name
lexer_name == default_alias_name
end
def search_term
@@ -287,6 +308,7 @@ module Linguist
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
Language.create(
:name => name,
:aliases => options[:aliases],
:lexer_name => options[:lexer],
:default_lexer => options[:default_lexer],
:extensions => options[:ext],

View File

@@ -1,10 +1,14 @@
# Defines all Lanaguges known to GitHub
#
# lexer - An explicit lexer String (defaults to name.downcase)
# aliases - An Array of additional aliases (implicitly includes name.downcase)
# ext - An Array of associated extensions
ASP:
:lexer: aspx-vb
:aliases:
- aspx
- aspx-vb
:ext:
- .ascx
- .axd
@@ -15,6 +19,8 @@ ASP:
- .asp
ActionScript:
:lexer: as3
:aliases:
- as3
:ext:
- .as
Ada:
@@ -29,10 +35,14 @@ Arc:
- .arc
Assembly:
:lexer: nasm
:aliases:
- nasm
:ext:
- .asm
Batchfile:
:lexer: bat
:aliases:
- bat
:ext:
- .bat
- .cmd
@@ -55,10 +65,14 @@ C:
- .h
C#:
:lexer: csharp
:aliases:
- csharp
:ext:
- .cs
C++:
:lexer: cpp
:aliases:
- cpp
:ext:
- .cpp
- .hh
@@ -85,10 +99,14 @@ CoffeeScript:
- Cakefile
ColdFusion:
:lexer: cfm
:aliases:
- cfm
:ext:
- .cfm
- .cfc
Common Lisp:
:aliases:
- lisp
:ext:
- .cl
- .lisp
@@ -107,6 +125,8 @@ D:
- .di
Darcs Patch:
:lexer: dpatch
:aliases:
- dpatch
:ext:
- .darcspatch
- .dpatch
@@ -128,6 +148,8 @@ Eiffel:
- .e
Emacs Lisp:
:lexer: scheme
:aliases:
- elisp
:ext:
- .el
- .emacs
@@ -167,6 +189,8 @@ Gentoo Eclass:
- .eclass
Gettext Catalog:
:lexer: pot
:aliases:
- pot
:ext:
- .po
- .pot
@@ -212,6 +236,8 @@ INI:
- .properties
IRC log:
:lexer: irc
:aliases:
- irc
:ext:
- .weechatlog
Io:
@@ -226,6 +252,8 @@ Groovy:
:ext:
- .groovy
JavaScript:
:aliases:
- js
:ext:
- .js
- .sjs
@@ -240,6 +268,8 @@ JSON:
- .json
Literate Haskell:
:lexer: lhs
:aliases:
- lhs
:ext:
- .lhs
LLVM:
@@ -326,6 +356,8 @@ PHP:
- .php5
Parrot Internal Representation:
:lexer: pir
:aliases:
- pir
:ext:
- .pir
- .pbc
@@ -349,6 +381,8 @@ Python:
- .pyw
Python traceback:
:lexer: pytb
:aliases:
- pytb
:ext:
- .pytb
R:
@@ -365,6 +399,8 @@ RHTML:
- .rhtml
Raw token data:
:lexer: raw
:aliases:
- raw
:ext:
- .raw
Rebol:
@@ -412,6 +448,10 @@ Self:
Shell:
:lexer: bash
:default_lexer: true
:aliases:
- sh
- bash
- zsh
:ext:
- .sh
- .zsh
@@ -457,6 +497,8 @@ Verilog:
- .v
VimL:
:lexer: vim
:aliases:
- vim
:ext:
- .vim
- .vimrc
@@ -493,6 +535,8 @@ YAML:
- .yaml
Java Server Pages:
:lexer: jsp
:aliases:
- jsp
:ext:
- .jsp
mupad:
@@ -503,6 +547,8 @@ ooc:
- .ooc
reStructuredText:
:lexer: rst
:aliases:
- rst
:ext:
- .rst
- .rest

View File

@@ -17,9 +17,3 @@ JavaScript:
Ruby:
- macruby
- rake
# Other shells
Shell:
- bash
- sh
- zsh

View File

@@ -15,6 +15,82 @@ class TestLanguage < Test::Unit::TestCase
def test_find_all_by_name
Language.all.each do |language|
assert_equal language, Language.find_by_name(language.name)
assert_equal language, Language[language.name]
end
end
def test_find_by_alias
assert_equal Language['Perl'], Language.find_by_alias('perl')
assert_equal Language['Python'], Language.find_by_alias('python')
assert_equal Language['Ruby'], Language.find_by_alias('ruby')
assert_equal Language['HTML+ERB'], Language.find_by_alias('html+erb')
assert_equal Language['Max/MSP'], Language.find_by_alias('max/msp')
assert_equal Language['Pure Data'], Language.find_by_alias('pure-data')
assert_equal Language['ASP'], Language.find_by_alias('asp')
assert_equal Language['ASP'], Language.find_by_alias('aspx')
assert_equal Language['ASP'], Language.find_by_alias('aspx-vb')
assert_equal Language['ActionScript'], Language.find_by_alias('as3')
assert_equal Language['Assembly'], Language.find_by_alias('nasm')
assert_equal Language['Batchfile'], Language.find_by_alias('bat')
assert_equal Language['C++'], Language.find_by_alias('c++')
assert_equal Language['C++'], Language.find_by_alias('cpp')
assert_equal Language['C#'], Language.find_by_alias('c#')
assert_equal Language['C#'], Language.find_by_alias('csharp')
assert_equal Language['Java'], Language.find_by_alias('java')
assert_equal Language['ChucK'], Language.find_by_alias('chuck')
assert_equal Language['Groovy'], Language.find_by_alias('groovy')
assert_equal Language['Java Server Pages'], Language.find_by_alias('jsp')
assert_equal Language['ColdFusion'], Language.find_by_alias('cfm')
assert_equal Language['Darcs Patch'], Language.find_by_alias('dpatch')
assert_equal Language['Common Lisp'], Language.find_by_alias('common-lisp')
assert_equal Language['Common Lisp'], Language.find_by_alias('lisp')
assert_equal Language['Emacs Lisp'], Language.find_by_alias('emacs-lisp')
assert_equal Language['Emacs Lisp'], Language.find_by_alias('elisp')
assert_equal Language['Nu'], Language.find_by_alias('nu')
assert_equal Language['Scheme'], Language.find_by_alias('scheme')
assert_equal Language['OCaml'], Language.find_by_alias('ocaml')
assert_equal Language['F#'], Language.find_by_alias('f#')
assert_equal Language['Gettext Catalog'], Language.find_by_alias('pot')
assert_equal Language['IRC log'], Language.find_by_alias('irc')
assert_equal Language['JavaScript'], Language.find_by_alias('javascript')
assert_equal Language['JavaScript'], Language.find_by_alias('js')
assert_equal Language['JSON'], Language.find_by_alias('json')
assert_equal Language['Haskell'], Language.find_by_alias('haskell')
assert_equal Language['Literate Haskell'], Language.find_by_alias('literate-haskell')
assert_equal Language['Literate Haskell'], Language.find_by_alias('lhs')
assert_equal Language['Parrot Internal Representation'], Language.find_by_alias('pir')
assert_equal Language['Python traceback'], Language.find_by_alias('pytb')
assert_equal Language['Raw token data'], Language.find_by_alias('raw')
assert_equal Language['reStructuredText'], Language.find_by_alias('rst')
assert_equal Language['Shell'], Language.find_by_alias('shell')
assert_equal Language['Shell'], Language.find_by_alias('sh')
assert_equal Language['Shell'], Language.find_by_alias('bash')
assert_equal Language['Shell'], Language.find_by_alias('zsh')
assert_equal Language['VimL'], Language.find_by_alias('viml')
assert_equal Language['VimL'], Language.find_by_alias('vim')
assert_equal Language['XS'], Language.find_by_alias('xs')
end
def test_find_all_by_alias
Language.all.each do |language|
language.aliases.each do |name|
assert_equal language, Language.find_by_alias(name)
assert_equal language, Language[name]
end
end
end
@@ -32,20 +108,6 @@ class TestLanguage < Test::Unit::TestCase
end
end
def test_find_by_lexer
assert_equal Language['C'], Language.find_by_lexer('c')
assert_equal Language['C++'], Language.find_by_lexer('cpp')
assert_equal Language['Java'], Language.find_by_lexer('java')
assert_equal Language['JavaScript'], Language.find_by_lexer('javascript')
assert_equal Language['OCaml'], Language.find_by_lexer('ocaml')
assert_equal Language['Perl'], Language.find_by_lexer('perl')
assert_equal Language['Python'], Language.find_by_lexer('python')
assert_equal Language['Ruby'], Language.find_by_lexer('ruby')
assert_equal Language['Scheme'], Language.find_by_lexer('scheme')
assert_equal Language['Shell'], Language.find_by_lexer('bash')
assert_equal Language['Text'], Language.find_by_lexer('kt')
end
def test_find
assert_equal "Ruby", Language['Ruby'].name
assert_equal "Ruby", Language['ruby'].name