Merge branch 'master' of git://github.com/github/linguist into detect-sbt-as-scala-lang

This commit is contained in:
softprops
2011-07-05 22:21:29 -04:00
14 changed files with 478 additions and 195 deletions

View File

@@ -6,7 +6,7 @@ We use this library at GitHub to detect blob languages, highlight code, ignore b
### Language detection ### Language detection
Linguist defines the list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). In order for a file to be hightlighed, a language and lexer must be defined there. Linguist defines the list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). In order for a file to be highlighted, a language and lexer must be defined there.
Most languages are detected by their file extension. This is the fastest and most common situation. For script files, which are usually extensionless, we do "deep content inspection"™ and check the shebang of the file. Checking the file's contents may also be used for disambiguating languages. C, C++ and Obj-C all use `.h` files. Looking for common keywords, we are usually able to guess the correct language. Most languages are detected by their file extension. This is the fastest and most common situation. For script files, which are usually extensionless, we do "deep content inspection"™ and check the shebang of the file. Checking the file's contents may also be used for disambiguating languages. C, C++ and Obj-C all use `.h` files. Looking for common keywords, we are usually able to guess the correct language.

View File

@@ -275,15 +275,15 @@ module Linguist
def guess_language def guess_language
return if binary? return if binary?
# If its a header file (.h) try to guess the language # Disambiguate between multiple language extensions
header_language || disambiguate_extension_language ||
# If it's a .r file, try to guess the language
r_language ||
# See if there is a Language for the extension # See if there is a Language for the extension
pathname.language || pathname.language ||
# Look for idioms in first line
first_line_language ||
# Try to detect Language from shebang line # Try to detect Language from shebang line
shebang_language shebang_language
end end
@@ -295,12 +295,22 @@ module Linguist
language ? language.lexer : Lexer['Text only'] language ? language.lexer : Lexer['Text only']
end end
# Internal: Disambiguates between multiple language extensions.
#
# Delegates to "guess_EXTENSION_language".
#
# Returns a Language or nil.
def disambiguate_extension_language
if Language.ambiguous?(extname)
name = "guess_#{extname.sub(/^\./, '')}_language"
send(name) if respond_to?(name)
end
end
# Internal: Guess language of header files (.h). # Internal: Guess language of header files (.h).
# #
# Returns a Language. # Returns a Language.
def header_language def guess_h_language
return unless extname == '.h'
if lines.grep(/^@(interface|property|private|public|end)/).any? if lines.grep(/^@(interface|property|private|public|end)/).any?
Language['Objective-C'] Language['Objective-C']
elsif lines.grep(/^class |^\s+(public|protected|private):/).any? elsif lines.grep(/^class |^\s+(public|protected|private):/).any?
@@ -310,12 +320,58 @@ module Linguist
end end
end end
# Internal: Guess language of .m files.
#
# Objective-C heuristics:
# * Keywords
#
# Matlab heuristics:
# * Leading function keyword
# * "%" comments
#
# Returns a Language.
def guess_m_language
# Objective-C keywords
if lines.grep(/^#import|@(interface|implementation|property|synthesize|end)/).any?
Language['Objective-C']
# File function
elsif lines.first.to_s =~ /^function /
Language['Matlab']
# Matlab comment
elsif lines.grep(/^%/).any?
Language['Matlab']
# Fallback to Objective-C, don't want any Matlab false positives
else
Language['Objective-C']
end
end
# Internal: Guess language of .pl files
#
# The rules for disambiguation are:
#
# 1. Many perl files begin with a shebang
# 2. Most Prolog source files have a rule somewhere (marked by the :- operator)
# 3. Default to Perl, because it is more popular
#
# Returns a Language.
def guess_pl_language
if shebang_script == 'perl'
Language['Perl']
elsif lines.grep(/:-/).any?
Language['Prolog']
else
Language['Perl']
end
end
# Internal: Guess language of .r files. # Internal: Guess language of .r files.
# #
# Returns a Language. # Returns a Language.
def r_language def guess_r_language
return unless extname == '.r'
if lines.grep(/(rebol|(:\s+func|make\s+object!|^\s*context)\s*\[)/i).any? if lines.grep(/(rebol|(:\s+func|make\s+object!|^\s*context)\s*\[)/i).any?
Language['Rebol'] Language['Rebol']
else else
@@ -323,6 +379,20 @@ module Linguist
end end
end end
# Internal: Guess language from the first line.
#
# Look for leading "<?php"
#
# Returns a Language.
def first_line_language
# Fail fast if blob isn't viewable?
return unless viewable?
if lines.first.to_s =~ /^<\?php/
Language['PHP']
end
end
# Internal: Extract the script name from the shebang line # Internal: Extract the script name from the shebang line
# #
# Requires Blob#data # Requires Blob#data
@@ -399,5 +469,12 @@ module Linguist
return if !text? || large? return if !text? || large?
lexer.colorize_without_wrapper(data) lexer.colorize_without_wrapper(data)
end end
Language.overridden_extensions.each do |extension|
name = "guess_#{extension.sub(/^\./, '')}_language"
unless instance_methods.include?(name)
warn "Language##{name} was not defined"
end
end
end end
end end

View File

@@ -9,12 +9,30 @@ module Linguist
# Languages are defined in `lib/linguist/languages.yml`. # Languages are defined in `lib/linguist/languages.yml`.
class Language class Language
@languages = [] @languages = []
@overrides = {}
@index = {} @index = {}
@name_index = {} @name_index = {}
@alias_index = {} @alias_index = {}
@extension_index = {} @extension_index = {}
@filename_index = {} @filename_index = {}
# Valid Languages types
TYPES = [:markup, :programming]
# Internal: Test if extension maps to multiple Languages.
#
# Returns true or false.
def self.ambiguous?(extension)
@overrides.include?(extension)
end
# Include?: Return overridden extensions.
#
# Returns extensions Array.
def self.overridden_extensions
@overrides.keys
end
# Internal: Create a new Language object # Internal: Create a new Language object
# #
# attributes - A hash of attributes # attributes - A hash of attributes
@@ -47,18 +65,22 @@ module Linguist
warn "Extension is missing a '.': #{extension.inspect}" warn "Extension is missing a '.': #{extension.inspect}"
end end
# All Language extensions should be unique. Warn if there is a unless ambiguous?(extension)
# duplicate.
if @extension_index.key?(extension)
warn "Duplicate extension: #{extension}"
end
# Index the extension with a leading ".": ".rb" # Index the extension with a leading ".": ".rb"
@extension_index[extension] = language @extension_index[extension] = language
# Index the extension without a leading ".": "rb" # Index the extension without a leading ".": "rb"
@extension_index[extension.sub(/^\./, '')] = language @extension_index[extension.sub(/^\./, '')] = language
end end
end
language.overrides.each do |extension|
if extension !~ /^\./
warn "Extension is missing a '.': #{extension.inspect}"
end
@overrides[extension] = language
end
language.filenames.each do |filename| language.filenames.each do |filename|
@filename_index[filename] = language @filename_index[filename] = language
@@ -179,6 +201,12 @@ module Linguist
# @name is required # @name is required
@name = attributes[:name] || raise(ArgumentError, "missing name") @name = attributes[:name] || raise(ArgumentError, "missing name")
# Set type
@type = attributes[:type] ? attributes[:type].to_sym : nil
if @type && !TYPES.include?(@type)
raise ArgumentError, "invalid type: #{@type}"
end
# Set aliases # Set aliases
@aliases = [default_alias_name] + (attributes[:aliases] || []) @aliases = [default_alias_name] + (attributes[:aliases] || [])
@@ -191,19 +219,15 @@ module Linguist
# Set extensions or default to []. # Set extensions or default to [].
@extensions = attributes[:extensions] || [] @extensions = attributes[:extensions] || []
@overrides = attributes[:overrides] || []
@filenames = attributes[:filenames] || [] @filenames = attributes[:filenames] || []
# Set popular, major, and searchable flags # Set popular, and searchable flags
@popular = attributes.key?(:popular) ? attributes[:popular] : false @popular = attributes.key?(:popular) ? attributes[:popular] : false
@major = attributes.key?(:major) ? attributes[:major] : false
@searchable = attributes.key?(:searchable) ? attributes[:searchable] : true @searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
# If group name is set, save the name so we can lazy load it later # If group name is set, save the name so we can lazy load it later
if attributes[:group_name] if attributes[:group_name]
if major?
warn "#{name} is a major language, it should not be grouped with #{attributes[:group_name]}"
end
@group = nil @group = nil
@group_name = attributes[:group_name] @group_name = attributes[:group_name]
@@ -211,7 +235,6 @@ module Linguist
else else
@group = self @group = self
end end
end end
# Public: Get proper name # Public: Get proper name
@@ -225,6 +248,11 @@ module Linguist
# Returns the name String # Returns the name String
attr_reader :name attr_reader :name
# Public: Get type.
#
# Returns a type Symbol or nil.
attr_reader :type
# Public: Get aliases # Public: Get aliases
# #
# Examples # Examples
@@ -260,6 +288,11 @@ module Linguist
# Returns the extensions Array # Returns the extensions Array
attr_reader :extensions attr_reader :extensions
# Internal: Get overridden extensions.
#
# Returns the extensions Array.
attr_reader :overrides
# Public: Get filenames # Public: Get filenames
# #
# Examples # Examples
@@ -278,12 +311,6 @@ module Linguist
# Public: Get Language group # Public: Get Language group
# #
# Minor languages maybe grouped with major languages for
# accounting purposes. For an example, JSP files are grouped as
# Java.
#
# For major languages, group should always return self.
#
# Returns a Language # Returns a Language
def group def group
@group ||= Language.find_by_name(@group_name) @group ||= Language.find_by_name(@group_name)
@@ -303,26 +330,6 @@ module Linguist
!popular? !popular?
end end
# Public: Is it major language?
#
# Major languages should be actual programming
# languages. Configuration formats should be excluded.
#
# Returns true or false
def major?
@major
end
# Public: Is it a minor language?
#
# Minor language include variants of major languages and
# markup languages like HTML and YAML.
#
# Returns true or false
def minor?
!major?
end
# Public: Is it searchable? # Public: Is it searchable?
# #
# Unsearchable languages won't by indexed by solr and won't show # Unsearchable languages won't by indexed by solr and won't show
@@ -375,14 +382,15 @@ module Linguist
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
Language.create( Language.create(
:name => name, :name => name,
:type => options['type'],
:aliases => options['aliases'], :aliases => options['aliases'],
:lexer => options['lexer'], :lexer => options['lexer'],
:group_name => options['group'], :group_name => options['group'],
:searchable => options.key?('searchable') ? options['searchable'] : true, :searchable => options.key?('searchable') ? options['searchable'] : true,
:search_term => options['search_term'], :search_term => options['search_term'],
:extensions => options['extensions'], :extensions => options['extensions'],
:overrides => options['overrides'],
:filenames => options['filenames'], :filenames => options['filenames'],
:major => options['major'],
:popular => popular.include?(name) :popular => popular.include?(name)
) )
end end

View File

@@ -5,12 +5,12 @@
# lexer exists in lexers.yml. This is a list of available in our # lexer exists in lexers.yml. This is a list of available in our
# version of pygments. # version of pygments.
# #
# type - Either programming, markup, or nil
# lexer - An explicit lexer String (defaults to name.downcase) # lexer - An explicit lexer String (defaults to name.downcase)
# aliases - An Array of additional aliases (implicitly # aliases - An Array of additional aliases (implicitly
# includes name.downcase) # includes name.downcase)
# extension - An Array of associated extensions # extension - An Array of associated extensions
# major - Boolean flag major programming languages. Please leave # overrides - An Array of extensions that takes precedence over conflicts
# this option to GitHub staff to decide.
# searchable - Boolean flag to enable searching (defaults to true) # searchable - Boolean flag to enable searching (defaults to true)
# search_term - Deprecated: Some languages maybe indexed under a # search_term - Deprecated: Some languages maybe indexed under a
# different alias. Avoid defining new exceptions. # different alias. Avoid defining new exceptions.
@@ -21,7 +21,7 @@
# Please keep this list alphabetized. # Please keep this list alphabetized.
ASP: ASP:
major: true type: programming
lexer: aspx-vb lexer: aspx-vb
search_term: aspx-vb search_term: aspx-vb
aliases: aliases:
@@ -37,7 +37,7 @@ ASP:
- .asp - .asp
ActionScript: ActionScript:
major: true type: programming
lexer: ActionScript 3 lexer: ActionScript 3
search_term: as3 search_term: as3
aliases: aliases:
@@ -46,7 +46,7 @@ ActionScript:
- .as - .as
Ada: Ada:
major: true type: programming
extensions: extensions:
- .adb - .adb
- .ads - .ads
@@ -57,13 +57,13 @@ AppleScript:
- .applescript - .applescript
Arc: Arc:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .arc - .arc
Assembly: Assembly:
major: true type: programming
lexer: NASM lexer: NASM
search_term: nasm search_term: nasm
aliases: aliases:
@@ -72,6 +72,7 @@ Assembly:
- .asm - .asm
Batchfile: Batchfile:
type: programming
group: Shell group: Shell
search_term: bat search_term: bat
aliases: aliases:
@@ -89,7 +90,7 @@ BlitzMax:
- .bmx - .bmx
Boo: Boo:
major: true type: programming
extensions: extensions:
- .boo - .boo
@@ -99,13 +100,15 @@ Brainfuck:
- .bf - .bf
C: C:
major: true type: programming
overrides:
- .h
extensions: extensions:
- .c - .c
- .h - .h
C#: C#:
major: true type: programming
search_term: csharp search_term: csharp
aliases: aliases:
- csharp - csharp
@@ -113,7 +116,7 @@ C#:
- .cs - .cs
C++: C++:
major: true type: programming
search_term: cpp search_term: cpp
aliases: aliases:
- cpp - cpp
@@ -123,6 +126,7 @@ C++:
- .cpp - .cpp
- .cu - .cu
- .cxx - .cxx
- .h
- .h++ - .h++
- .hh - .hh
- .hpp - .hpp
@@ -144,19 +148,19 @@ ChucK:
- .ck - .ck
Clojure: Clojure:
major: true type: programming
extensions: extensions:
- .clj - .clj
CoffeeScript: CoffeeScript:
major: true type: programming
extensions: extensions:
- .coffee - .coffee
filenames: filenames:
- Cakefile - Cakefile
ColdFusion: ColdFusion:
major: true type: programming
lexer: Coldfusion HTML lexer: Coldfusion HTML
search_term: cfm search_term: cfm
aliases: aliases:
@@ -166,7 +170,7 @@ ColdFusion:
- .cfc - .cfc
Common Lisp: Common Lisp:
major: true type: programming
aliases: aliases:
- lisp - lisp
extensions: extensions:
@@ -186,6 +190,7 @@ Cucumber:
- .feature - .feature
Cython: Cython:
type: programming
group: Python group: Python
extensions: extensions:
- .pyx - .pyx
@@ -193,7 +198,7 @@ Cython:
- .pxi - .pxi
D: D:
major: true type: programming
extensions: extensions:
- .d - .d
- .di - .di
@@ -212,7 +217,7 @@ Darcs Patch:
- .dpatch - .dpatch
Delphi: Delphi:
major: true type: programming
extensions: extensions:
- .pas - .pas
@@ -222,18 +227,18 @@ Diff:
- .patch - .patch
Dylan: Dylan:
major: true type: programming
extensions: extensions:
- .dylan - .dylan
Eiffel: Eiffel:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .e - .e
Emacs Lisp: Emacs Lisp:
major: true type: programming
lexer: Scheme lexer: Scheme
aliases: aliases:
- elisp - elisp
@@ -242,13 +247,13 @@ Emacs Lisp:
- .emacs - .emacs
Erlang: Erlang:
major: true type: programming
extensions: extensions:
- .hrl - .hrl
- .erl - .erl
F#: F#:
major: true type: programming
lexer: OCaml lexer: OCaml
search_term: ocaml search_term: ocaml
extensions: extensions:
@@ -257,7 +262,7 @@ F#:
- .fsx - .fsx
FORTRAN: FORTRAN:
major: true type: programming
lexer: Fortran lexer: Fortran
extensions: extensions:
- .f - .f
@@ -266,17 +271,18 @@ FORTRAN:
- .F90 - .F90
Factor: Factor:
major: true type: programming
extensions: extensions:
- .factor - .factor
Fancy: Fancy:
major: true type: programming
extensions: extensions:
- .fy - .fy
- .fancypack - .fancypack
GAS: GAS:
type: programming
group: Assembly group: Assembly
extensions: extensions:
- .s - .s
@@ -308,7 +314,7 @@ Gettext Catalog:
- .pot - .pot
Go: Go:
major: true type: programming
extensions: extensions:
- .go - .go
@@ -324,13 +330,14 @@ Groff:
- '.7' - '.7'
Groovy: Groovy:
major: true type: programming
lexer: Java lexer: Java
extensions: extensions:
- .gradle - .gradle
- .groovy - .groovy
HTML: HTML:
type: markup
extensions: extensions:
- .html - .html
- .xhtml - .xhtml
@@ -338,12 +345,14 @@ HTML:
- .xslt - .xslt
HTML+Django: HTML+Django:
type: markup
group: HTML group: HTML
lexer: HTML+Django/Jinja lexer: HTML+Django/Jinja
extensions: extensions:
- .mustache - .mustache
HTML+ERB: HTML+ERB:
type: markup
group: HTML group: HTML
lexer: RHTML lexer: RHTML
extensions: extensions:
@@ -351,12 +360,13 @@ HTML+ERB:
- .html.erb - .html.erb
HTML+PHP: HTML+PHP:
type: markup
group: HTML group: HTML
extensions: extensions:
- .phtml - .phtml
HaXe: HaXe:
major: true type: programming
lexer: haXe lexer: haXe
extensions: extensions:
- .hx - .hx
@@ -364,11 +374,12 @@ HaXe:
- .mtt - .mtt
Haml: Haml:
type: markup
extensions: extensions:
- .haml - .haml
Haskell: Haskell:
major: true type: programming
extensions: extensions:
- .hs - .hs
- .hsc - .hsc
@@ -390,7 +401,7 @@ IRC log:
- .weechatlog - .weechatlog
Io: Io:
major: true type: programming
extensions: extensions:
- .io - .io
@@ -402,7 +413,7 @@ JSON:
- .json - .json
Java: Java:
major: true type: programming
extensions: extensions:
- .java - .java
- .pde - .pde
@@ -417,7 +428,7 @@ Java Server Pages:
- .jsp - .jsp
JavaScript: JavaScript:
major: true type: programming
aliases: aliases:
- js - js
- node - node
@@ -443,6 +454,7 @@ LilyPond:
- .ily - .ily
Literate Haskell: Literate Haskell:
type: programming
group: Haskell group: Haskell
search_term: lhs search_term: lhs
aliases: aliases:
@@ -451,7 +463,7 @@ Literate Haskell:
- .lhs - .lhs
Lua: Lua:
major: true type: programming
extensions: extensions:
- .lua - .lua
- .nse - .nse
@@ -467,6 +479,7 @@ Mako:
- .mao - .mao
Markdown: Markdown:
type: markup
lexer: Text only lexer: Text only
extensions: extensions:
- .md - .md
@@ -477,10 +490,11 @@ Markdown:
Matlab: Matlab:
extensions: extensions:
- .m
- .matlab - .matlab
Max/MSP: Max/MSP:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .mxt - .mxt
@@ -489,7 +503,7 @@ MiniD: # Legacy
searchable: false searchable: false
Mirah: Mirah:
major: true type: programming
lexer: Ruby lexer: Ruby
search_term: ruby search_term: ruby
extensions: extensions:
@@ -511,7 +525,7 @@ Nimrod:
- .nim - .nim
Nu: Nu:
major: true type: programming
lexer: Scheme lexer: Scheme
aliases: aliases:
- nush - nush
@@ -528,7 +542,7 @@ NumPy:
- .numpyw - .numpyw
OCaml: OCaml:
major: true type: programming
extensions: extensions:
- .ml - .ml
- .mly - .mly
@@ -541,25 +555,29 @@ ObjDump:
- .objdump - .objdump
Objective-C: Objective-C:
major: true type: programming
overrides:
- .m
extensions: extensions:
- .h
- .m - .m
- .mm - .mm
Objective-J: Objective-J:
major: true type: programming
extensions: extensions:
- .j - .j
- .sj - .sj
OpenCL: OpenCL:
type: programming
group: C group: C
lexer: C lexer: C
extensions: extensions:
- .cl - .cl
PHP: PHP:
major: true type: programming
extensions: extensions:
- .php - .php
- .aw - .aw
@@ -579,7 +597,9 @@ Parrot Internal Representation:
- .pasm - .pasm
Perl: Perl:
major: true type: programming
overrides:
- .pl
extensions: extensions:
- .pl - .pl
- .ph - .ph
@@ -590,14 +610,21 @@ Perl:
- .perl - .perl
- .psgi - .psgi
Prolog:
type: programming
extensions:
- .pl
- .pro
- .prolog
Pure Data: Pure Data:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .pd - .pd
Python: Python:
major: true type: programming
extensions: extensions:
- .py - .py
- .pyw - .pyw
@@ -612,19 +639,22 @@ Python traceback:
- .pytb - .pytb
R: R:
major: true type: programming
lexer: S lexer: S
overrides:
- .r
extensions: extensions:
- .r - .r
- .R - .R
RHTML: RHTML:
type: markup
group: HTML group: HTML
extensions: extensions:
- .rhtml - .rhtml
Racket: Racket:
major: true type: programming
lexer: Scheme lexer: Scheme
extensions: extensions:
- .rkt - .rkt
@@ -642,16 +672,17 @@ Raw token data:
Rebol: Rebol:
lexer: REBOL lexer: REBOL
extensions: extensions:
- .rebol - .r
- .r2 - .r2
- .r3 - .r3
- .rebol
Redcode: Redcode:
extensions: extensions:
- .cw - .cw
Ruby: Ruby:
major: true type: programming
aliases: aliases:
- jruby - jruby
- macruby - macruby
@@ -686,13 +717,13 @@ Sass:
- .sass - .sass
Scala: Scala:
major: true type: programming
extensions: extensions:
- .sbt - .sbt
- .scala - .scala
Scheme: Scheme:
major: true type: programming
extensions: extensions:
- .sls - .sls
- .ss - .ss
@@ -700,13 +731,13 @@ Scheme:
- .scm - .scm
Self: Self:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .self - .self
Shell: Shell:
major: true type: programming
lexer: Bash lexer: Bash
search_term: bash search_term: bash
aliases: aliases:
@@ -716,6 +747,7 @@ Shell:
extensions: extensions:
- .bash - .bash
- .sh - .sh
- .zsh
filenames: filenames:
- .bash_profile - .bash_profile
- .bashrc - .bashrc
@@ -725,7 +757,7 @@ Shell:
- .zshrc - .zshrc
Smalltalk: Smalltalk:
major: true type: programming
extensions: extensions:
- .st - .st
@@ -742,17 +774,18 @@ Standard ML:
- .sml - .sml
SuperCollider: SuperCollider:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .sc - .sc
Tcl: Tcl:
major: true type: programming
extensions: extensions:
- .tcl - .tcl
Tcsh: Tcsh:
type: programming
group: Shell group: Shell
extensions: extensions:
- .tcsh - .tcsh
@@ -772,30 +805,31 @@ Text:
- .txt - .txt
Textile: Textile:
type: markup
lexer: Text only lexer: Text only
extensions: extensions:
- .textile - .textile
VHDL: VHDL:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .vhdl - .vhdl
- .vhd - .vhd
Vala: Vala:
major: true type: programming
extensions: extensions:
- .vala - .vala
Verilog: Verilog:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .v - .v
VimL: VimL:
major: true type: programming
search_term: vim search_term: vim
aliases: aliases:
- vim - vim
@@ -806,7 +840,7 @@ VimL:
- .gvimrc - .gvimrc
Visual Basic: Visual Basic:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .bas - .bas
@@ -816,6 +850,7 @@ Visual Basic:
- .vb - .vb
XML: XML:
type: markup
extensions: extensions:
- .xml - .xml
- .rss - .rss
@@ -828,7 +863,7 @@ XML:
- .rdf - .rdf
XQuery: XQuery:
major: true type: programming
extensions: extensions:
- .xq - .xq
- .xqm - .xqm
@@ -841,6 +876,7 @@ XS:
- .xs - .xs
YAML: YAML:
type: markup
extensions: extensions:
- .yml - .yml
- .yaml - .yaml
@@ -853,12 +889,13 @@ mupad:
- .mu - .mu
ooc: ooc:
major: true type: programming
lexer: Ooc lexer: Ooc
extensions: extensions:
- .ooc - .ooc
reStructuredText: reStructuredText:
type: markup
search_term: rst search_term: rst
aliases: aliases:
- rst - rst

View File

@@ -141,7 +141,7 @@ module Linguist
# #
# Returns html String # Returns html String
def colorize(text) def colorize(text)
Albino.colorize(text, self) Albino.new(text, self).colorize(:O => 'stripnl=false')
end end
# Public: Highlight syntax of text without the outer highlight div # Public: Highlight syntax of text without the outer highlight div

View File

@@ -70,12 +70,9 @@ module Linguist
# Skip vendored or generated blobs # Skip vendored or generated blobs
next if blob.vendored? || blob.generated? || blob.language.nil? next if blob.vendored? || blob.generated? || blob.language.nil?
# Get language group # Only include programming languages
language = blob.language.group if blob.language.type == :programming
@sizes[blob.language.group] += blob.size
# Only include major languages
if language.major?
@sizes[language] += blob.size
end end
end end

140
test/fixtures/drupal.module vendored Normal file
View File

@@ -0,0 +1,140 @@
<?php
/**
* @file
* Additional filter for PHP input.
*/
/**
* Implements hook_help().
*/
function php_help($path, $arg) {
switch ($path) {
case 'admin/help#php':
$output = '';
$output .= '<h3>' . t('About') . '</h3>';
$output .= '<p>' . t('The PHP filter module adds a PHP filter to your site, for use with <a href="@filter">text formats</a>. This filter adds the ability to execute PHP code in any text field that uses a text format (such as the body of a content item or the text of a comment). <a href="@php-net">PHP</a> is a general-purpose scripting language widely-used for web development, and is the language with which Drupal has been developed. For more information, see the online handbook entry for the <a href="@php">PHP filter module</a>.', array('@filter' => url('admin/help/filter'), '@php-net' => 'http://www.php.net', '@php' => 'http://drupal.org/handbook/modules/php/')) . '</p>';
$output .= '<h3>' . t('Uses') . '</h3>';
$output .= '<dl>';
$output .= '<dt>' . t('Enabling execution of PHP in text fields') . '</dt>';
$output .= '<dd>' . t('The PHP filter module allows users with the proper permissions to include custom PHP code that will get executed when pages of your site are processed. While this is a powerful and flexible feature if used by a trusted user with PHP experience, it is a significant and dangerous security risk in the hands of a malicious or inexperienced user. Even a trusted user may accidentally compromise the site by entering malformed or incorrect PHP code. Only the most trusted users should be granted permission to use the PHP filter, and all PHP code added through the PHP filter should be carefully examined before use. <a href="@php-snippets">Example PHP snippets</a> can be found on Drupal.org.', array('@php-snippets' => url('http://drupal.org/handbook/customization/php-snippets'))) . '</dd>';
$output .= '</dl>';
return $output;
}
}
/**
* Implements hook_permission().
*/
function php_permission() {
return array(
'use PHP for settings' => array(
'title' => t('Use PHP for settings'),
'restrict access' => TRUE,
),
);
}
/**
* Evaluate a string of PHP code.
*
* This is a wrapper around PHP's eval(). It uses output buffering to capture both
* returned and printed text. Unlike eval(), we require code to be surrounded by
* <?php ?> tags; in other words, we evaluate the code as if it were a stand-alone
* PHP file.
*
* Using this wrapper also ensures that the PHP code which is evaluated can not
* overwrite any variables in the calling code, unlike a regular eval() call.
*
* @param $code
* The code to evaluate.
* @return
* A string containing the printed output of the code, followed by the returned
* output of the code.
*
* @ingroup php_wrappers
*/
function php_eval($code) {
global $theme_path, $theme_info, $conf;
// Store current theme path.
$old_theme_path = $theme_path;
// Restore theme_path to the theme, as long as php_eval() executes,
// so code evaluated will not see the caller module as the current theme.
// If theme info is not initialized get the path from theme_default.
if (!isset($theme_info)) {
$theme_path = drupal_get_path('theme', $conf['theme_default']);
}
else {
$theme_path = dirname($theme_info->filename);
}
ob_start();
print eval('?>' . $code);
$output = ob_get_contents();
ob_end_clean();
// Recover original theme path.
$theme_path = $old_theme_path;
return $output;
}
/**
* Tips callback for php filter.
*/
function _php_filter_tips($filter, $format, $long = FALSE) {
global $base_url;
if ($long) {
$output = '<h4>' . t('Using custom PHP code') . '</h4>';
$output .= '<p>' . t('Custom PHP code may be embedded in some types of site content, including posts and blocks. While embedding PHP code inside a post or block is a powerful and flexible feature when used by a trusted user with PHP experience, it is a significant and dangerous security risk when used improperly. Even a small mistake when posting PHP code may accidentally compromise your site.') . '</p>';
$output .= '<p>' . t('If you are unfamiliar with PHP, SQL, or Drupal, avoid using custom PHP code within posts. Experimenting with PHP may corrupt your database, render your site inoperable, or significantly compromise security.') . '</p>';
$output .= '<p>' . t('Notes:') . '</p>';
$output .= '<ul><li>' . t('Remember to double-check each line for syntax and logic errors <strong>before</strong> saving.') . '</li>';
$output .= '<li>' . t('Statements must be correctly terminated with semicolons.') . '</li>';
$output .= '<li>' . t('Global variables used within your PHP code retain their values after your script executes.') . '</li>';
$output .= '<li>' . t('<code>register_globals</code> is <strong>turned off</strong>. If you need to use forms, understand and use the functions in <a href="@formapi">the Drupal Form API</a>.', array('@formapi' => url('http://api.drupal.org/api/group/form_api/7'))) . '</li>';
$output .= '<li>' . t('Use a <code>print</code> or <code>return</code> statement in your code to output content.') . '</li>';
$output .= '<li>' . t('Develop and test your PHP code using a separate test script and sample database before deploying on a production site.') . '</li>';
$output .= '<li>' . t('Consider including your custom PHP code within a site-specific module or <code>template.php</code> file rather than embedding it directly into a post or block.') . '</li>';
$output .= '<li>' . t('Be aware that the ability to embed PHP code within content is provided by the PHP Filter module. If this module is disabled or deleted, then blocks and posts with embedded PHP may display, rather than execute, the PHP code.') . '</li></ul>';
$output .= '<p>' . t('A basic example: <em>Creating a "Welcome" block that greets visitors with a simple message.</em>') . '</p>';
$output .= '<ul><li>' . t('<p>Add a custom block to your site, named "Welcome" . With its text format set to "PHP code" (or another format supporting PHP input), add the following in the Block body:</p>
<pre>
print t(\'Welcome visitor! Thank you for visiting.\');
</pre>') . '</li>';
$output .= '<li>' . t('<p>To display the name of a registered user, use this instead:</p>
<pre>
global $user;
if ($user->uid) {
print t(\'Welcome @name! Thank you for visiting.\', array(\'@name\' => format_username($user)));
}
else {
print t(\'Welcome visitor! Thank you for visiting.\');
}
</pre>') . '</li></ul>';
$output .= '<p>' . t('<a href="@drupal">Drupal.org</a> offers <a href="@php-snippets">some example PHP snippets</a>, or you can create your own with some PHP experience and knowledge of the Drupal system.', array('@drupal' => url('http://drupal.org'), '@php-snippets' => url('http://drupal.org/handbook/customization/php-snippets'))) . '</p>';
return $output;
}
else {
return t('You may post PHP code. You should include &lt;?php ?&gt; tags.');
}
}
/**
* Implements hook_filter_info().
*
* Provide PHP code filter. Use with care.
*/
function php_filter_info() {
$filters['php_code'] = array(
'title' => t('PHP evaluator'),
'description' => t('Executes a piece of PHP code. The usage of this filter should be restricted to administrators only!'),
'process callback' => 'php_eval',
'tips callback' => '_php_filter_tips',
'cache' => FALSE,
);
return $filters;
}

9
test/fixtures/matlab_function.m vendored Normal file
View File

@@ -0,0 +1,9 @@
function ret = matlab_function(A,B)
% Simple function adding two values and displaying the return value
ret = A+B;
% Display the return value
disp('Return value in function');
disp(ret);

12
test/fixtures/matlab_script.m vendored Normal file
View File

@@ -0,0 +1,12 @@
% Matlab example script
%Call matlab_function function which resides in the same directory
value1 = 5 % semicolon at end of line is not mandatory, only suppresses output to command line.
value2 = 3
% Calculate sum of value1 and value2
result = matlab_function(value1,value2);
disp('called from script')
disp(result);

2
test/fixtures/test-perl.pl vendored Normal file
View File

@@ -0,0 +1,2 @@
#!/usr/bin/perl
print "Hello, world!\n";

3
test/fixtures/test-perl2.pl vendored Normal file
View File

@@ -0,0 +1,3 @@
# Perl file without shebang
print "Hello, world!\n";

12
test/fixtures/test-prolog.pl vendored Normal file
View File

@@ -0,0 +1,12 @@
/* Prolog test file */
male(john).
male(peter).
female(vick).
female(christie).
parents(john, peter, christie).
parents(vick, peter, christie).
/* X is a brother of Y */
brother(X, Y) :- male(X), parents(X, F, M), parents(Y, F, M).

View File

@@ -230,6 +230,17 @@ class TestBlob < Test::Unit::TestCase
assert_equal Language['Ruby'], blob("wrong_shebang.rb").language assert_equal Language['Ruby'], blob("wrong_shebang.rb").language
assert_nil blob("octocat.png").language assert_nil blob("octocat.png").language
# .pl disambiguation
assert_equal Language['Prolog'], blob("test-prolog.pl").language
assert_equal Language['Perl'], blob("test-perl.pl").language
assert_equal Language['Perl'], blob("test-perl2.pl").language
# .m disambiguation
assert_equal Language['Objective-C'], blob("Foo.m").language
assert_equal Language['Objective-C'], blob("hello.m").language
assert_equal Language['Matlab'], blob("matlab_function.m").language
assert_equal Language['Matlab'], blob("matlab_script.m").language
# .r disambiguation # .r disambiguation
assert_equal Language['R'], blob("hello-r.R").language assert_equal Language['R'], blob("hello-r.R").language
assert_equal Language['Rebol'], blob("hello-rebol.r").language assert_equal Language['Rebol'], blob("hello-rebol.r").language
@@ -275,6 +286,9 @@ class TestBlob < Test::Unit::TestCase
# http://docs.racket-lang.org/scribble/ # http://docs.racket-lang.org/scribble/
assert_equal Language['Racket'], blob("scribble.scrbl").language assert_equal Language['Racket'], blob("scribble.scrbl").language
# https://github.com/drupal/drupal/blob/7.x/modules/php/php.module
assert_equal Language['PHP'], blob("drupal.module").language
end end
def test_lexer def test_lexer

View File

@@ -5,6 +5,20 @@ require 'test/unit'
class TestLanguage < Test::Unit::TestCase class TestLanguage < Test::Unit::TestCase
include Linguist include Linguist
def test_ambiguous_extensions
assert Language.ambiguous?('.h')
assert_equal Language['C'], Language.find_by_extension('h')
assert Language.ambiguous?('.m')
assert_equal Language['Objective-C'], Language.find_by_extension('m')
assert Language.ambiguous?('.pl')
assert_equal Language['Perl'], Language.find_by_extension('pl')
assert Language.ambiguous?('.r')
assert_equal Language['R'], Language.find_by_extension('r')
end
def test_lexer def test_lexer
# Add an assertion to this list if you add/change any lexers # Add an assertion to this list if you add/change any lexers
# in languages.yml. Please keep this list alphabetized. # in languages.yml. Please keep this list alphabetized.
@@ -81,6 +95,7 @@ class TestLanguage < Test::Unit::TestCase
assert_equal Lexer['Ooc'], Language['ooc'].lexer assert_equal Lexer['Ooc'], Language['ooc'].lexer
assert_equal Lexer['PHP'], Language['PHP'].lexer assert_equal Lexer['PHP'], Language['PHP'].lexer
assert_equal Lexer['Perl'], Language['Perl'].lexer assert_equal Lexer['Perl'], Language['Perl'].lexer
assert_equal Lexer['Prolog'], Language['Prolog'].lexer
assert_equal Lexer['Python Traceback'], Language['Python traceback'].lexer assert_equal Lexer['Python Traceback'], Language['Python traceback'].lexer
assert_equal Lexer['Python'], Language['Python'].lexer assert_equal Lexer['Python'], Language['Python'].lexer
assert_equal Lexer['REBOL'], Language['Rebol'].lexer assert_equal Lexer['REBOL'], Language['Rebol'].lexer
@@ -198,23 +213,18 @@ class TestLanguage < Test::Unit::TestCase
assert_equal Language['reStructuredText'], Language.find_by_alias('rst') assert_equal Language['reStructuredText'], Language.find_by_alias('rst')
end end
def test_major_groups
Language.all.each do |language|
if language.major?
assert_equal language, language.group
end
end
end
def test_groups def test_groups
assert_equal Language['Assembly'], Language['GAS'].group assert_equal Language['Assembly'], Language['GAS'].group
assert_equal Language['C'], Language['OpenCL'].group assert_equal Language['C'], Language['OpenCL'].group
assert_equal Language['Haskell'], Language['Literate Haskell'].group assert_equal Language['Haskell'], Language['Literate Haskell'].group
assert_equal Language['Java'], Language['Java Server Pages'].group assert_equal Language['Java'], Language['Java Server Pages'].group
assert_equal Language['JavaScript'], Language['JSON'].group assert_equal Language['JavaScript'], Language['JSON'].group
assert_equal Language['Perl'], Language['Perl'].group
assert_equal Language['Python'], Language['Cython'].group assert_equal Language['Python'], Language['Cython'].group
assert_equal Language['Python'], Language['NumPy'].group assert_equal Language['Python'], Language['NumPy'].group
assert_equal Language['Python'], Language['Python traceback'].group assert_equal Language['Python'], Language['Python traceback'].group
assert_equal Language['Python'], Language['Python'].group
assert_equal Language['Ruby'], Language['Ruby'].group
assert_equal Language['Shell'], Language['Batchfile'].group assert_equal Language['Shell'], Language['Batchfile'].group
assert_equal Language['Shell'], Language['Gentoo Ebuild'].group assert_equal Language['Shell'], Language['Gentoo Ebuild'].group
assert_equal Language['Shell'], Language['Gentoo Eclass'].group assert_equal Language['Shell'], Language['Gentoo Eclass'].group
@@ -270,71 +280,21 @@ class TestLanguage < Test::Unit::TestCase
assert Language['Brainfuck'].unpopular? assert Language['Brainfuck'].unpopular?
end end
def test_major def test_programming
# Add an assertion to this list if you add/change any major assert_equal :programming, Language['JavaScript'].type
# settings in languages.yml. Please keep this list alphabetized. assert_equal :programming, Language['Perl'].type
assert Language['ASP'].major? assert_equal :programming, Language['Python'].type
assert Language['ActionScript'].major? assert_equal :programming, Language['Ruby'].type
assert Language['Ada'].major?
assert Language['Arc'].major?
assert Language['Assembly'].major?
assert Language['Boo'].major?
assert Language['C#'].major?
assert Language['C'].major?
assert Language['C++'].major?
assert Language['Clojure'].major?
assert Language['CoffeeScript'].major?
assert Language['ColdFusion'].major?
assert Language['Common Lisp'].major?
assert Language['D'].major?
assert Language['Delphi'].major?
assert Language['Dylan'].major?
assert Language['Eiffel'].major?
assert Language['Emacs Lisp'].major?
assert Language['Erlang'].major?
assert Language['F#'].major?
assert Language['FORTRAN'].major?
assert Language['Factor'].major?
assert Language['Go'].major?
assert Language['Groovy'].major?
assert Language['HaXe'].major?
assert Language['Haskell'].major?
assert Language['Io'].major?
assert Language['Java'].major?
assert Language['JavaScript'].major?
assert Language['Lua'].major?
assert Language['Max/MSP'].major?
assert Language['Nu'].major?
assert Language['OCaml'].major?
assert Language['Objective-C'].major?
assert Language['Objective-J'].major?
assert Language['PHP'].major?
assert Language['Perl'].major?
assert Language['Pure Data'].major?
assert Language['Python'].major?
assert Language['R'].major?
assert Language['Racket'].major?
assert Language['Ruby'].major?
assert Language['Scala'].major?
assert Language['Scheme'].major?
assert Language['Self'].major?
assert Language['Smalltalk'].major?
assert Language['SuperCollider'].major?
assert Language['Tcl'].major?
assert Language['VHDL'].major?
assert Language['Vala'].major?
assert Language['Verilog'].major?
assert Language['VimL'].major?
assert Language['Visual Basic'].major?
assert Language['XQuery'].major?
assert Language['ooc'].major?
end end
def test_minor def test_markup
assert Language['Brainfuck'].minor? assert_equal :markup, Language['HTML'].type
assert Language['HTML'].minor? assert_equal :markup, Language['YAML'].type
assert Language['Makefile'].minor? end
assert Language['YAML'].minor?
def test_other
assert_nil Language['Brainfuck'].type
assert_nil Language['Makefile'].type
end end
def test_searchable def test_searchable
@@ -381,10 +341,12 @@ class TestLanguage < Test::Unit::TestCase
def test_find_all_by_extension def test_find_all_by_extension
Language.all.each do |language| Language.all.each do |language|
language.extensions.each do |extension| language.extensions.each do |extension|
unless Language.ambiguous?(extension)
assert_equal language, Language.find_by_extension(extension) assert_equal language, Language.find_by_extension(extension)
end end
end end
end end
end
def test_find_by_filename def test_find_by_filename
assert_equal Language['Ruby'], Language.find_by_filename('foo.rb') assert_equal Language['Ruby'], Language.find_by_filename('foo.rb')
@@ -457,6 +419,16 @@ Hello
assert_equal <<-HTML, Language['Ruby'].colorize_without_wrapper("def foo\n 'foo'\nend\n") assert_equal <<-HTML, Language['Ruby'].colorize_without_wrapper("def foo\n 'foo'\nend\n")
<span class="k">def</span> <span class="nf">foo</span> <span class="k">def</span> <span class="nf">foo</span>
<span class="s1">&#39;foo&#39;</span> <span class="s1">&#39;foo&#39;</span>
<span class="k">end</span>
HTML
end
def test_colorize_doesnt_strip_newlines
assert_equal <<-HTML, Language['Ruby'].colorize_without_wrapper("\n\n# Foo\ndef 'foo'\nend\n")
<span class="c1"># Foo</span>
<span class="k">def</span> <span class="s1">&#39;foo&#39;</span>
<span class="k">end</span> <span class="k">end</span>
HTML HTML
end end