Merge branch 'master' of git://github.com/github/linguist into detect-sbt-as-scala-lang

This commit is contained in:
softprops
2011-07-05 22:21:29 -04:00
14 changed files with 478 additions and 195 deletions

View File

@@ -275,15 +275,15 @@ module Linguist
def guess_language
return if binary?
# If its a header file (.h) try to guess the language
header_language ||
# If it's a .r file, try to guess the language
r_language ||
# Disambiguate between multiple language extensions
disambiguate_extension_language ||
# See if there is a Language for the extension
pathname.language ||
# Look for idioms in first line
first_line_language ||
# Try to detect Language from shebang line
shebang_language
end
@@ -295,12 +295,22 @@ module Linguist
language ? language.lexer : Lexer['Text only']
end
# Internal: Disambiguates between multiple language extensions.
#
# Delegates to "guess_EXTENSION_language".
#
# Returns a Language or nil.
def disambiguate_extension_language
if Language.ambiguous?(extname)
name = "guess_#{extname.sub(/^\./, '')}_language"
send(name) if respond_to?(name)
end
end
# Internal: Guess language of header files (.h).
#
# Returns a Language.
def header_language
return unless extname == '.h'
def guess_h_language
if lines.grep(/^@(interface|property|private|public|end)/).any?
Language['Objective-C']
elsif lines.grep(/^class |^\s+(public|protected|private):/).any?
@@ -310,12 +320,58 @@ module Linguist
end
end
# Internal: Guess language of .m files.
#
# Objective-C heuristics:
# * Keywords
#
# Matlab heuristics:
# * Leading function keyword
# * "%" comments
#
# Returns a Language.
def guess_m_language
# Objective-C keywords
if lines.grep(/^#import|@(interface|implementation|property|synthesize|end)/).any?
Language['Objective-C']
# File function
elsif lines.first.to_s =~ /^function /
Language['Matlab']
# Matlab comment
elsif lines.grep(/^%/).any?
Language['Matlab']
# Fallback to Objective-C, don't want any Matlab false positives
else
Language['Objective-C']
end
end
# Internal: Guess language of .pl files
#
# The rules for disambiguation are:
#
# 1. Many perl files begin with a shebang
# 2. Most Prolog source files have a rule somewhere (marked by the :- operator)
# 3. Default to Perl, because it is more popular
#
# Returns a Language.
def guess_pl_language
if shebang_script == 'perl'
Language['Perl']
elsif lines.grep(/:-/).any?
Language['Prolog']
else
Language['Perl']
end
end
# Internal: Guess language of .r files.
#
# Returns a Language.
def r_language
return unless extname == '.r'
def guess_r_language
if lines.grep(/(rebol|(:\s+func|make\s+object!|^\s*context)\s*\[)/i).any?
Language['Rebol']
else
@@ -323,6 +379,20 @@ module Linguist
end
end
# Internal: Guess language from the first line.
#
# Look for leading "<?php"
#
# Returns a Language.
def first_line_language
# Fail fast if blob isn't viewable?
return unless viewable?
if lines.first.to_s =~ /^<\?php/
Language['PHP']
end
end
# Internal: Extract the script name from the shebang line
#
# Requires Blob#data
@@ -399,5 +469,12 @@ module Linguist
return if !text? || large?
lexer.colorize_without_wrapper(data)
end
Language.overridden_extensions.each do |extension|
name = "guess_#{extension.sub(/^\./, '')}_language"
unless instance_methods.include?(name)
warn "Language##{name} was not defined"
end
end
end
end

View File

@@ -9,12 +9,30 @@ module Linguist
# Languages are defined in `lib/linguist/languages.yml`.
class Language
@languages = []
@overrides = {}
@index = {}
@name_index = {}
@alias_index = {}
@extension_index = {}
@filename_index = {}
# Valid Languages types
TYPES = [:markup, :programming]
# Internal: Test if extension maps to multiple Languages.
#
# Returns true or false.
def self.ambiguous?(extension)
@overrides.include?(extension)
end
# Include?: Return overridden extensions.
#
# Returns extensions Array.
def self.overridden_extensions
@overrides.keys
end
# Internal: Create a new Language object
#
# attributes - A hash of attributes
@@ -47,17 +65,21 @@ module Linguist
warn "Extension is missing a '.': #{extension.inspect}"
end
# All Language extensions should be unique. Warn if there is a
# duplicate.
if @extension_index.key?(extension)
warn "Duplicate extension: #{extension}"
unless ambiguous?(extension)
# Index the extension with a leading ".": ".rb"
@extension_index[extension] = language
# Index the extension without a leading ".": "rb"
@extension_index[extension.sub(/^\./, '')] = language
end
end
language.overrides.each do |extension|
if extension !~ /^\./
warn "Extension is missing a '.': #{extension.inspect}"
end
# Index the extension with a leading ".": ".rb"
@extension_index[extension] = language
# Index the extension without a leading ".": "rb"
@extension_index[extension.sub(/^\./, '')] = language
@overrides[extension] = language
end
language.filenames.each do |filename|
@@ -179,6 +201,12 @@ module Linguist
# @name is required
@name = attributes[:name] || raise(ArgumentError, "missing name")
# Set type
@type = attributes[:type] ? attributes[:type].to_sym : nil
if @type && !TYPES.include?(@type)
raise ArgumentError, "invalid type: #{@type}"
end
# Set aliases
@aliases = [default_alias_name] + (attributes[:aliases] || [])
@@ -191,19 +219,15 @@ module Linguist
# Set extensions or default to [].
@extensions = attributes[:extensions] || []
@overrides = attributes[:overrides] || []
@filenames = attributes[:filenames] || []
# Set popular, major, and searchable flags
# Set popular, and searchable flags
@popular = attributes.key?(:popular) ? attributes[:popular] : false
@major = attributes.key?(:major) ? attributes[:major] : false
@searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
# If group name is set, save the name so we can lazy load it later
if attributes[:group_name]
if major?
warn "#{name} is a major language, it should not be grouped with #{attributes[:group_name]}"
end
@group = nil
@group_name = attributes[:group_name]
@@ -211,7 +235,6 @@ module Linguist
else
@group = self
end
end
# Public: Get proper name
@@ -225,6 +248,11 @@ module Linguist
# Returns the name String
attr_reader :name
# Public: Get type.
#
# Returns a type Symbol or nil.
attr_reader :type
# Public: Get aliases
#
# Examples
@@ -260,6 +288,11 @@ module Linguist
# Returns the extensions Array
attr_reader :extensions
# Internal: Get overridden extensions.
#
# Returns the extensions Array.
attr_reader :overrides
# Public: Get filenames
#
# Examples
@@ -278,12 +311,6 @@ module Linguist
# Public: Get Language group
#
# Minor languages maybe grouped with major languages for
# accounting purposes. For an example, JSP files are grouped as
# Java.
#
# For major languages, group should always return self.
#
# Returns a Language
def group
@group ||= Language.find_by_name(@group_name)
@@ -303,26 +330,6 @@ module Linguist
!popular?
end
# Public: Is it major language?
#
# Major languages should be actual programming
# languages. Configuration formats should be excluded.
#
# Returns true or false
def major?
@major
end
# Public: Is it a minor language?
#
# Minor language include variants of major languages and
# markup languages like HTML and YAML.
#
# Returns true or false
def minor?
!major?
end
# Public: Is it searchable?
#
# Unsearchable languages won't by indexed by solr and won't show
@@ -375,14 +382,15 @@ module Linguist
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
Language.create(
:name => name,
:type => options['type'],
:aliases => options['aliases'],
:lexer => options['lexer'],
:group_name => options['group'],
:searchable => options.key?('searchable') ? options['searchable'] : true,
:search_term => options['search_term'],
:extensions => options['extensions'],
:overrides => options['overrides'],
:filenames => options['filenames'],
:major => options['major'],
:popular => popular.include?(name)
)
end

View File

@@ -5,12 +5,12 @@
# lexer exists in lexers.yml. This is a list of available in our
# version of pygments.
#
# type - Either programming, markup, or nil
# lexer - An explicit lexer String (defaults to name.downcase)
# aliases - An Array of additional aliases (implicitly
# includes name.downcase)
# extension - An Array of associated extensions
# major - Boolean flag major programming languages. Please leave
# this option to GitHub staff to decide.
# overrides - An Array of extensions that takes precedence over conflicts
# searchable - Boolean flag to enable searching (defaults to true)
# search_term - Deprecated: Some languages maybe indexed under a
# different alias. Avoid defining new exceptions.
@@ -21,7 +21,7 @@
# Please keep this list alphabetized.
ASP:
major: true
type: programming
lexer: aspx-vb
search_term: aspx-vb
aliases:
@@ -37,7 +37,7 @@ ASP:
- .asp
ActionScript:
major: true
type: programming
lexer: ActionScript 3
search_term: as3
aliases:
@@ -46,7 +46,7 @@ ActionScript:
- .as
Ada:
major: true
type: programming
extensions:
- .adb
- .ads
@@ -57,13 +57,13 @@ AppleScript:
- .applescript
Arc:
major: true
type: programming
lexer: Text only
extensions:
- .arc
Assembly:
major: true
type: programming
lexer: NASM
search_term: nasm
aliases:
@@ -72,6 +72,7 @@ Assembly:
- .asm
Batchfile:
type: programming
group: Shell
search_term: bat
aliases:
@@ -89,7 +90,7 @@ BlitzMax:
- .bmx
Boo:
major: true
type: programming
extensions:
- .boo
@@ -99,13 +100,15 @@ Brainfuck:
- .bf
C:
major: true
type: programming
overrides:
- .h
extensions:
- .c
- .h
C#:
major: true
type: programming
search_term: csharp
aliases:
- csharp
@@ -113,7 +116,7 @@ C#:
- .cs
C++:
major: true
type: programming
search_term: cpp
aliases:
- cpp
@@ -123,6 +126,7 @@ C++:
- .cpp
- .cu
- .cxx
- .h
- .h++
- .hh
- .hpp
@@ -144,19 +148,19 @@ ChucK:
- .ck
Clojure:
major: true
type: programming
extensions:
- .clj
CoffeeScript:
major: true
type: programming
extensions:
- .coffee
filenames:
- Cakefile
ColdFusion:
major: true
type: programming
lexer: Coldfusion HTML
search_term: cfm
aliases:
@@ -166,7 +170,7 @@ ColdFusion:
- .cfc
Common Lisp:
major: true
type: programming
aliases:
- lisp
extensions:
@@ -186,6 +190,7 @@ Cucumber:
- .feature
Cython:
type: programming
group: Python
extensions:
- .pyx
@@ -193,7 +198,7 @@ Cython:
- .pxi
D:
major: true
type: programming
extensions:
- .d
- .di
@@ -212,7 +217,7 @@ Darcs Patch:
- .dpatch
Delphi:
major: true
type: programming
extensions:
- .pas
@@ -222,18 +227,18 @@ Diff:
- .patch
Dylan:
major: true
type: programming
extensions:
- .dylan
Eiffel:
major: true
type: programming
lexer: Text only
extensions:
- .e
Emacs Lisp:
major: true
type: programming
lexer: Scheme
aliases:
- elisp
@@ -242,13 +247,13 @@ Emacs Lisp:
- .emacs
Erlang:
major: true
type: programming
extensions:
- .hrl
- .erl
F#:
major: true
type: programming
lexer: OCaml
search_term: ocaml
extensions:
@@ -257,7 +262,7 @@ F#:
- .fsx
FORTRAN:
major: true
type: programming
lexer: Fortran
extensions:
- .f
@@ -266,17 +271,18 @@ FORTRAN:
- .F90
Factor:
major: true
type: programming
extensions:
- .factor
Fancy:
major: true
type: programming
extensions:
- .fy
- .fancypack
GAS:
type: programming
group: Assembly
extensions:
- .s
@@ -308,7 +314,7 @@ Gettext Catalog:
- .pot
Go:
major: true
type: programming
extensions:
- .go
@@ -324,13 +330,14 @@ Groff:
- '.7'
Groovy:
major: true
type: programming
lexer: Java
extensions:
- .gradle
- .groovy
HTML:
type: markup
extensions:
- .html
- .xhtml
@@ -338,12 +345,14 @@ HTML:
- .xslt
HTML+Django:
type: markup
group: HTML
lexer: HTML+Django/Jinja
extensions:
- .mustache
HTML+ERB:
type: markup
group: HTML
lexer: RHTML
extensions:
@@ -351,12 +360,13 @@ HTML+ERB:
- .html.erb
HTML+PHP:
type: markup
group: HTML
extensions:
- .phtml
HaXe:
major: true
type: programming
lexer: haXe
extensions:
- .hx
@@ -364,11 +374,12 @@ HaXe:
- .mtt
Haml:
type: markup
extensions:
- .haml
Haskell:
major: true
type: programming
extensions:
- .hs
- .hsc
@@ -390,7 +401,7 @@ IRC log:
- .weechatlog
Io:
major: true
type: programming
extensions:
- .io
@@ -402,7 +413,7 @@ JSON:
- .json
Java:
major: true
type: programming
extensions:
- .java
- .pde
@@ -417,7 +428,7 @@ Java Server Pages:
- .jsp
JavaScript:
major: true
type: programming
aliases:
- js
- node
@@ -443,6 +454,7 @@ LilyPond:
- .ily
Literate Haskell:
type: programming
group: Haskell
search_term: lhs
aliases:
@@ -451,7 +463,7 @@ Literate Haskell:
- .lhs
Lua:
major: true
type: programming
extensions:
- .lua
- .nse
@@ -467,6 +479,7 @@ Mako:
- .mao
Markdown:
type: markup
lexer: Text only
extensions:
- .md
@@ -477,10 +490,11 @@ Markdown:
Matlab:
extensions:
- .m
- .matlab
Max/MSP:
major: true
type: programming
lexer: Text only
extensions:
- .mxt
@@ -489,7 +503,7 @@ MiniD: # Legacy
searchable: false
Mirah:
major: true
type: programming
lexer: Ruby
search_term: ruby
extensions:
@@ -511,7 +525,7 @@ Nimrod:
- .nim
Nu:
major: true
type: programming
lexer: Scheme
aliases:
- nush
@@ -528,7 +542,7 @@ NumPy:
- .numpyw
OCaml:
major: true
type: programming
extensions:
- .ml
- .mly
@@ -541,25 +555,29 @@ ObjDump:
- .objdump
Objective-C:
major: true
type: programming
overrides:
- .m
extensions:
- .h
- .m
- .mm
Objective-J:
major: true
type: programming
extensions:
- .j
- .sj
OpenCL:
type: programming
group: C
lexer: C
extensions:
- .cl
PHP:
major: true
type: programming
extensions:
- .php
- .aw
@@ -579,7 +597,9 @@ Parrot Internal Representation:
- .pasm
Perl:
major: true
type: programming
overrides:
- .pl
extensions:
- .pl
- .ph
@@ -590,14 +610,21 @@ Perl:
- .perl
- .psgi
Prolog:
type: programming
extensions:
- .pl
- .pro
- .prolog
Pure Data:
major: true
type: programming
lexer: Text only
extensions:
- .pd
Python:
major: true
type: programming
extensions:
- .py
- .pyw
@@ -612,19 +639,22 @@ Python traceback:
- .pytb
R:
major: true
type: programming
lexer: S
overrides:
- .r
extensions:
- .r
- .R
RHTML:
type: markup
group: HTML
extensions:
- .rhtml
Racket:
major: true
type: programming
lexer: Scheme
extensions:
- .rkt
@@ -642,16 +672,17 @@ Raw token data:
Rebol:
lexer: REBOL
extensions:
- .rebol
- .r
- .r2
- .r3
- .rebol
Redcode:
extensions:
- .cw
Ruby:
major: true
type: programming
aliases:
- jruby
- macruby
@@ -686,13 +717,13 @@ Sass:
- .sass
Scala:
major: true
type: programming
extensions:
- .sbt
- .scala
Scheme:
major: true
type: programming
extensions:
- .sls
- .ss
@@ -700,13 +731,13 @@ Scheme:
- .scm
Self:
major: true
type: programming
lexer: Text only
extensions:
- .self
Shell:
major: true
type: programming
lexer: Bash
search_term: bash
aliases:
@@ -716,6 +747,7 @@ Shell:
extensions:
- .bash
- .sh
- .zsh
filenames:
- .bash_profile
- .bashrc
@@ -725,7 +757,7 @@ Shell:
- .zshrc
Smalltalk:
major: true
type: programming
extensions:
- .st
@@ -742,17 +774,18 @@ Standard ML:
- .sml
SuperCollider:
major: true
type: programming
lexer: Text only
extensions:
- .sc
Tcl:
major: true
type: programming
extensions:
- .tcl
Tcsh:
type: programming
group: Shell
extensions:
- .tcsh
@@ -772,30 +805,31 @@ Text:
- .txt
Textile:
type: markup
lexer: Text only
extensions:
- .textile
VHDL:
major: true
type: programming
lexer: Text only
extensions:
- .vhdl
- .vhd
Vala:
major: true
type: programming
extensions:
- .vala
Verilog:
major: true
type: programming
lexer: Text only
extensions:
- .v
VimL:
major: true
type: programming
search_term: vim
aliases:
- vim
@@ -806,7 +840,7 @@ VimL:
- .gvimrc
Visual Basic:
major: true
type: programming
lexer: Text only
extensions:
- .bas
@@ -816,6 +850,7 @@ Visual Basic:
- .vb
XML:
type: markup
extensions:
- .xml
- .rss
@@ -828,7 +863,7 @@ XML:
- .rdf
XQuery:
major: true
type: programming
extensions:
- .xq
- .xqm
@@ -841,6 +876,7 @@ XS:
- .xs
YAML:
type: markup
extensions:
- .yml
- .yaml
@@ -853,12 +889,13 @@ mupad:
- .mu
ooc:
major: true
type: programming
lexer: Ooc
extensions:
- .ooc
reStructuredText:
type: markup
search_term: rst
aliases:
- rst

View File

@@ -141,7 +141,7 @@ module Linguist
#
# Returns html String
def colorize(text)
Albino.colorize(text, self)
Albino.new(text, self).colorize(:O => 'stripnl=false')
end
# Public: Highlight syntax of text without the outer highlight div

View File

@@ -70,12 +70,9 @@ module Linguist
# Skip vendored or generated blobs
next if blob.vendored? || blob.generated? || blob.language.nil?
# Get language group
language = blob.language.group
# Only include major languages
if language.major?
@sizes[language] += blob.size
# Only include programming languages
if blob.language.type == :programming
@sizes[blob.language.group] += blob.size
end
end