Merge branch 'master' into revert-2014-revert-1976-path-for-fileblob

This commit is contained in:
Arfon Smith
2015-03-13 11:42:24 -07:00
91 changed files with 6629 additions and 101 deletions

View File

@@ -6,3 +6,15 @@ require 'linguist/repository'
require 'linguist/samples'
require 'linguist/shebang'
require 'linguist/version'
class << Linguist
attr_accessor :instrumenter
def instrument(*args, &bk)
if instrumenter
instrumenter.instrument(*args, &bk)
else
yield if block_given?
end
end
end

View File

@@ -74,7 +74,7 @@ module Linguist
#
# Returns an Array
def extensions
basename, *segments = name.split(".")
basename, *segments = name.downcase.split(".")
segments.map.with_index do |segment, index|
"." + segments[index..-1].join(".")

View File

@@ -33,7 +33,7 @@ module Linguist
# disambiguate "Perl", "Prolog" do |data|
# if data.include?("use strict")
# Language["Perl"]
# elsif data.include?(":-")
# elsif /^[^#]+:-/.match(data)
# Language["Prolog"]
# end
# end
@@ -94,23 +94,27 @@ module Linguist
Language["Perl6"]
elsif data.match(/use strict|use\s+v?5\./)
Language["Perl"]
elsif data.include?(":-")
elsif /^[^#]+:-/.match(data)
Language["Prolog"]
end
end
disambiguate "ECL", "Prolog" do |data|
if data.include?(":-")
if /^[^#]+:-/.match(data)
Language["Prolog"]
elsif data.include?(":=")
Language["ECL"]
end
end
disambiguate "IDL", "Prolog" do |data|
if data.include?(":-")
disambiguate "IDL", "Prolog", "INI", "QMake" do |data|
if /^[^#]+:-/.match(data)
Language["Prolog"]
else
elsif data.include?("last_client=")
Language["INI"]
elsif data.include?("HEADERS") && data.include?("SOURCES")
Language["QMake"]
elsif /^\s*function[ \w,]+$/.match(data)
Language["IDL"]
end
end
@@ -178,11 +182,13 @@ module Linguist
end
end
disambiguate "M", "Mathematica", "Matlab", "Mercury", "Objective-C" do |data|
disambiguate "M", "MUF", "Mathematica", "Matlab", "Mercury", "Objective-C" do |data|
if ObjectiveCRegex.match(data)
Language["Objective-C"]
elsif data.include?(":- module")
Language["Mercury"]
elsif /^: /.match(data)
Language["MUF"]
elsif /^\s*;/.match(data)
Language["M"]
elsif /^\s*\(\*/.match(data)
@@ -229,5 +235,31 @@ module Linguist
Language["Text"]
end
end
disambiguate "PLSQL", "SQLPL", "PLpgSQL", "SQL" do |data|
if /^\\i\b|AS \$\$|LANGUAGE '+plpgsql'+/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
#Postgres
Language["PLpgSQL"]
elsif /(alter module)|(language sql)|(begin( NOT)+ atomic)/i.match(data) || /signal SQLSTATE '[0-9]+'/i.match(data)
#IBM db2
Language["SQLPL"]
elsif /pragma|\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
#Oracle
Language["PLSQL"]
elsif ! /begin|boolean|package|exception/i.match(data)
#Generic SQL
Language["SQL"]
end
end
disambiguate "D", "DTrace", "Makefile" do |data|
if /^module /.match(data)
Language["D"]
elsif /^((dtrace:::)?BEGIN|provider |#pragma (D (option|attributes)|ident)\s)/.match(data)
Language["DTrace"]
elsif /(\/.*:( .* \\)$| : \\$|^ : |: \\$)/.match(data)
Language["Makefile"]
end
end
end
end

View File

@@ -73,7 +73,7 @@ module Linguist
raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
end
@extension_index[extension] << language
@extension_index[extension.downcase] << language
end
language.interpreters.each do |interpreter|
@@ -105,19 +105,31 @@ module Linguist
# Bail early if the blob is binary or empty.
return nil if blob.likely_binary? || blob.binary? || blob.empty?
# Call each strategy until one candidate is returned.
STRATEGIES.reduce([]) do |languages, strategy|
candidates = strategy.call(blob, languages)
if candidates.size == 1
return candidates.first
elsif candidates.size > 1
# More than one candidate was found, pass them to the next strategy.
candidates
else
# No candiates were found, pass on languages from the previous strategy.
languages
Linguist.instrument("linguist.detection", :blob => blob) do
# Call each strategy until one candidate is returned.
languages = []
returning_strategy = nil
STRATEGIES.each do |strategy|
returning_strategy = strategy
candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
strategy.call(blob, languages)
end
if candidates.size == 1
languages = candidates
break
elsif candidates.size > 1
# More than one candidate was found, pass them to the next strategy.
languages = candidates
else
# No candidates, try the next strategy
end
end
end.first
Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
languages.first
end
end
# Public: Get all Languages
@@ -191,7 +203,7 @@ module Linguist
# Returns all matching Languages or [] if none were found.
def self.find_by_extension(extname)
extname = ".#{extname}" unless extname.start_with?(".")
@extension_index[extname]
@extension_index[extname.downcase]
end
# DEPRECATED
@@ -528,8 +540,8 @@ module Linguist
if extnames = extensions[name]
extnames.each do |extname|
if !options['extensions'].index { |x| x.end_with? extname }
warn "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml" unless extname == '.script!'
if !options['extensions'].index { |x| x.downcase.end_with? extname.downcase }
warn "#{name} has a sample with extension (#{extname.downcase}) that isn't explicitly defined in languages.yml" unless extname == '.script!'
options['extensions'] << extname
end
end

View File

@@ -41,6 +41,14 @@ AGS Script:
tm_scope: source.c++
ace_mode: c_cpp
AMPL:
type: programming
color: "#00008B"
extensions:
- .ampl
tm_scope: source.ampl
ace_mode: text
ANTLR:
type: programming
color: "#9DC3FF"
@@ -204,7 +212,6 @@ Assembly:
- nasm
extensions:
- .asm
- .ASM
- .a51
- .nasm
tm_scope: source.asm.x86
@@ -271,6 +278,7 @@ Batchfile:
ace_mode: batchfile
Befunge:
type: programming
extensions:
- .befunge
ace_mode: text
@@ -326,6 +334,7 @@ Boo:
ace_mode: text
Brainfuck:
type: programming
extensions:
- .b
- .bf
@@ -347,11 +356,9 @@ Bro:
C:
type: programming
color: "#555"
color: "#555555"
extensions:
- .c
- .C
- .H
- .cats
- .h
- .idc
@@ -421,6 +428,7 @@ CLIPS:
ace_mode: text
CMake:
type: programming
extensions:
- .cmake
- .cmake.in
@@ -432,8 +440,6 @@ COBOL:
type: programming
extensions:
- .cob
- .COB
- .CPY
- .cbl
- .ccp
- .cobol
@@ -480,6 +486,7 @@ Chapel:
ace_mode: text
ChucK:
type: programming
extensions:
- .ck
tm_scope: source.java
@@ -641,6 +648,7 @@ Crystal:
- crystal
Cucumber:
type: programming
extensions:
- .feature
tm_scope: text.gherkin.feature
@@ -699,7 +707,17 @@ DM:
tm_scope: source.c++
ace_mode: c_cpp
DTrace:
type: programming
extensions:
- .d
interpreters:
- dtrace
tm_scope: source.c
ace_mode: c_cpp
Darcs Patch:
type: programming
search_term: dpatch
aliases:
- dpatch
@@ -717,6 +735,7 @@ Dart:
ace_mode: dart
Diff:
type: programming
extensions:
- .diff
- .patch
@@ -872,14 +891,6 @@ FORTRAN:
color: "#4d41b1"
extensions:
- .f90
- .F
- .F03
- .F08
- .F77
- .F90
- .F95
- .FOR
- .FPP
- .f
- .f03
- .f08
@@ -923,9 +934,7 @@ Forth:
color: "#341708"
extensions:
- .fth
- .4TH
- .4th
- .F
- .f
- .for
- .forth
@@ -974,7 +983,6 @@ GAS:
group: Assembly
extensions:
- .s
- .S
tm_scope: source.asm.x86
ace_mode: assembly_x86
@@ -986,7 +994,6 @@ GDScript:
ace_mode: text
GLSL:
group: C
type: programming
extensions:
- .glsl
@@ -1014,6 +1021,7 @@ Game Maker Language:
ace_mode: c_cpp
Genshi:
type: programming
extensions:
- .kid
tm_scope: text.xml.genshi
@@ -1023,6 +1031,7 @@ Genshi:
ace_mode: xml
Gentoo Ebuild:
type: programming
group: Shell
extensions:
- .ebuild
@@ -1030,6 +1039,7 @@ Gentoo Ebuild:
ace_mode: sh
Gentoo Eclass:
type: programming
group: Shell
extensions:
- .eclass
@@ -1037,6 +1047,7 @@ Gentoo Eclass:
ace_mode: sh
Gettext Catalog:
type: prose
search_term: pot
searchable: false
aliases:
@@ -1132,11 +1143,11 @@ Graphviz (DOT):
tm_scope: source.dot
extensions:
- .dot
- .DOT
- .gv
ace_mode: text
Groff:
type: programming
extensions:
- .man
- '.1'
@@ -1164,6 +1175,7 @@ Groovy:
- groovy
Groovy Server Pages:
type: programming
group: Groovy
aliases:
- gsp
@@ -1313,6 +1325,7 @@ INI:
- .ini
- .cfg
- .prefs
- .pro
- .properties
tm_scope: source.ini
aliases:
@@ -1320,6 +1333,7 @@ INI:
ace_mode: ini
IRC log:
type: data
search_term: irc
aliases:
- irc
@@ -1350,6 +1364,7 @@ Inform 7:
ace_mode: text
Inno Setup:
type: programming
extensions:
- .iss
tm_scope: source.inno
@@ -1445,6 +1460,7 @@ Java:
- .java
Java Server Pages:
type: programming
group: Java
search_term: jsp
aliases:
@@ -1540,6 +1556,7 @@ LFE:
ace_mode: lisp
LLVM:
type: programming
extensions:
- .ll
ace_mode: text
@@ -1591,6 +1608,13 @@ Latte:
tm_scope: source.smarty
ace_mode: smarty
Lean:
type: programming
extensions:
- .lean
- .hlean
ace_mode: lean
Less:
type: markup
group: CSS
@@ -1600,6 +1624,7 @@ Less:
ace_mode: less
LilyPond:
type: programming
extensions:
- .ly
- .ily
@@ -1720,6 +1745,15 @@ MTML:
tm_scope: text.html.basic
ace_mode: html
MUF:
type: programming
group: Forth
extensions:
- .muf
- .m
tm_scope: none
ace_mode: forth
Makefile:
type: programming
aliases:
@@ -1728,6 +1762,7 @@ Makefile:
- mf
extensions:
- .mak
- .d
- .mk
filenames:
- GNUmakefile
@@ -1738,6 +1773,7 @@ Makefile:
ace_mode: makefile
Mako:
type: programming
extensions:
- .mako
- .mao
@@ -1774,6 +1810,8 @@ Mathematica:
- .ma
- .nb
- .nbp
- .wl
- .wlt
aliases:
- mma
ace_mode: text
@@ -1830,6 +1868,7 @@ Mercury:
ace_mode: prolog
MiniD: # Legacy
type: programming
searchable: false
extensions:
- .minid # Dummy extension
@@ -1877,12 +1916,21 @@ MoonScript:
ace_mode: text
Myghty:
type: programming
extensions:
- .myt
tm_scope: none
ace_mode: text
NL:
type: data
extensions:
- .nl
tm_scope: none
ace_mode: text
NSIS:
type: programming
extensions:
- .nsi
- .nsh
@@ -1974,6 +2022,7 @@ Nu:
- nush
NumPy:
type: programming
group: Python
extensions:
- .numpy
@@ -2087,7 +2136,7 @@ OpenSCAD:
type: programming
extensions:
- .scad
tm_scope: none
tm_scope: source.scad
ace_mode: scad
Org:
@@ -2152,6 +2201,26 @@ PHP:
aliases:
- inc
#Oracle
PLSQL:
type: programming
ace_mode: sql
tm_scope: source.plsql.oracle
extensions:
- .pls
- .pkb
- .pks
- .plb
- .sql
#Postgres
PLpgSQL:
type: programming
ace_mode: pgsql
tm_scope: source.sql
extensions:
- .sql
Pan:
type: programming
color: '#cc0000'
@@ -2217,7 +2286,6 @@ Perl:
color: "#0298c3"
extensions:
- .pl
- .PL
- .cgi
- .fcgi
- .perl
@@ -2425,7 +2493,7 @@ Python traceback:
ace_mode: text
QML:
type: markup
type: programming
color: "#44a51c"
extensions:
- .qml
@@ -2433,6 +2501,7 @@ QML:
ace_mode: text
QMake:
type: programming
extensions:
- .pro
- .pri
@@ -2449,8 +2518,6 @@ R:
- splus
extensions:
- .r
- .R
- .Rd
- .rd
- .rsx
filenames:
@@ -2503,7 +2570,6 @@ RMarkdown:
ace_mode: markdown
extensions:
- .rmd
- .Rmd
tm_scope: none
Racket:
@@ -2529,6 +2595,7 @@ Ragel in Ruby Host:
ace_mode: text
Raw token data:
type: data
search_term: raw
aliases:
- raw
@@ -2560,6 +2627,7 @@ Red:
ace_mode: text
Redcode:
type: programming
extensions:
- .cw
tm_scope: none
@@ -2598,6 +2666,7 @@ Ruby:
- .gemspec
- .god
- .irbrc
- .jbuilder
- .mspec
- .pluginspec
- .podspec
@@ -2682,6 +2751,15 @@ SQL:
- .udf
- .viw
#IBM DB2
SQLPL:
type: programming
ace_mode: sql
tm_scope: source.sql
extensions:
- .sql
- .db2
STON:
type: data
group: Smalltalk
@@ -2846,6 +2924,7 @@ Smalltalk:
ace_mode: text
Smarty:
type: programming
extensions:
- .tpl
ace_mode: smarty
@@ -2857,6 +2936,7 @@ SourcePawn:
- sourcemod
extensions:
- .sp
- .sma
tm_scope: source.sp
ace_mode: text
@@ -3068,11 +3148,11 @@ UnrealScript:
VCL:
type: programming
ace_mode: perl
color: "#0298c3"
extensions:
- .vcl
tm_scope: source.perl
tm_scope: source.varnish.vcl
ace_mode: text
VHDL:
type: programming
@@ -3162,9 +3242,10 @@ WebIDL:
XC:
type: programming
color: "#99DA07"
extensions:
- .xc
tm_scope: source.c
tm_scope: source.xc
ace_mode: c_cpp
XML:
@@ -3214,11 +3295,11 @@ XML:
- .sublime-snippet
- .targets
- .tmCommand
- .tml
- .tmLanguage
- .tmPreferences
- .tmSnippet
- .tmTheme
- .tml
- .ts
- .ui
- .urdf
@@ -3269,6 +3350,7 @@ XQuery:
ace_mode: xquery
XS:
type: programming
extensions:
- .xs
tm_scope: source.c
@@ -3341,6 +3423,7 @@ desktop:
eC:
type: programming
color: "#4A4773"
search_term: ec
extensions:
- .ec
@@ -3365,6 +3448,7 @@ fish:
ace_mode: text
mupad:
type: programming
extensions:
- .mu
ace_mode: text

View File

@@ -9,21 +9,21 @@
- CSS
- Clojure
- CoffeeScript
- Common Lisp
- Diff
- Emacs Lisp
- Erlang
- Go
- HTML
- Haskell
- Java
- JavaScript
- Lua
- Matlab
- Objective-C
- PHP
- Perl
- Python
- R
- Ruby
- SQL
- Scala
- Scheme
- Shell
- Swift
- TeX
- VimL

View File

@@ -1,3 +1,3 @@
module Linguist
VERSION = "4.5.0b1"
VERSION = "4.5.0b2"
end