Merge branch 'master' into 1036-local

Conflicts:
	lib/linguist/heuristics.rb
	lib/linguist/samples.json
This commit is contained in:
Arfon Smith
2014-10-23 12:05:18 +01:00
85 changed files with 59180 additions and 73601 deletions

View File

@@ -321,6 +321,11 @@ module Linguist
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
end
# Internal: Get the TextMate compatible scope for the blob
def tm_scope
language && language.tm_scope
end
# Public: Highlight syntax of blob
#
# options - A Hash of options (defaults to {})

View File

@@ -63,6 +63,7 @@ module Linguist
generated_jni_header? ||
composer_lock? ||
node_modules? ||
godeps? ||
vcr_cassette? ||
generated_by_zephir?
end
@@ -231,6 +232,14 @@ module Linguist
!!name.match(/node_modules\//)
end
# Internal: Is the blob part of Godeps/,
# which are not meant for humans in pull requests.
#
# Returns true or false.
def godeps?
!!name.match(/Godeps\//)
end
# Internal: Is the blob a generated php composer lock file?
#
# Returns true or false.

View File

@@ -14,27 +14,25 @@ module Linguist
def self.find_by_heuristics(data, languages)
if active?
if languages.all? { |l| ["Objective-C", "C++", "C"].include?(l) }
disambiguate_c(data, languages)
result = disambiguate_c(data, languages)
end
if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
disambiguate_pl(data, languages)
result = disambiguate_pl(data, languages)
end
if languages.all? { |l| ["ECL", "Prolog"].include?(l) }
disambiguate_ecl(data, languages)
result = disambiguate_ecl(data, languages)
end
if languages.all? { |l| ["TypeScript", "XML"].include?(l) }
disambiguate_ts(data, languages)
if languages.all? { |l| ["IDL", "Prolog"].include?(l) }
result = disambiguate_pro(data, languages)
end
if languages.all? { |l| ["Common Lisp", "OpenCL"].include?(l) }
disambiguate_cl(data, languages)
end
if languages.all? { |l| ["Rebol", "R"].include?(l) }
disambiguate_r(data, languages)
result = disambiguate_cl(data, languages)
end
return result
end
end
# .h extensions are ambigious between C, C++, and Objective-C.
# .h extensions are ambiguous between C, C++, and Objective-C.
# We want to shortcut look for Objective-C _and_ now C++ too!
#
# Returns an array of Languages or []
@@ -64,6 +62,16 @@ module Linguist
matches
end
def self.disambiguate_pro(data, languages)
matches = []
if (data.include?(":-"))
matches << Language["Prolog"]
else
matches << Language["IDL"]
end
matches
end
def self.disambiguate_ts(data, languages)
matches = []
if (data.include?("</translation>"))

View File

@@ -135,8 +135,8 @@ module Linguist
# No shebang. Still more work to do. Try to find it with our heuristics.
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
determined.first
# Lastly, fall back to the probablistic classifier.
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names ).first
# Lastly, fall back to the probabilistic classifier.
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]]
end
@@ -290,6 +290,16 @@ module Linguist
@lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) ||
raise(ArgumentError, "#{@name} is missing lexer")
@tm_scope = attributes[:tm_scope] || begin
context = case @type
when :data, :markup, :prose
'text'
when :programming, nil
'source'
end
"#{context}.#{@name.downcase}"
end
@ace_mode = attributes[:ace_mode]
@wrap = attributes[:wrap] || false
@@ -363,6 +373,11 @@ module Linguist
# Returns the Lexer
attr_reader :lexer
# Public: Get the name of a TextMate-compatible scope
#
# Returns the scope
attr_reader :tm_scope
# Public: Get Ace mode
#
# Examples
@@ -510,9 +525,9 @@ module Linguist
end
end
extensions = Samples::DATA['extnames']
interpreters = Samples::DATA['interpreters']
filenames = Samples::DATA['filenames']
extensions = Samples.cache['extnames']
interpreters = Samples.cache['interpreters']
filenames = Samples.cache['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
languages_yml = File.expand_path("../languages.yml", __FILE__)
@@ -564,6 +579,7 @@ module Linguist
:type => options['type'],
:aliases => options['aliases'],
:lexer => options['lexer'],
:tm_scope => options['tm_scope'],
:ace_mode => options['ace_mode'],
:wrap => options['wrap'],
:group_name => options['group'],

View File

@@ -83,6 +83,7 @@ ATS:
ActionScript:
type: programming
lexer: ActionScript 3
tm_scope: source.actionscript.3
color: "#e3491a"
search_term: as3
aliases:
@@ -119,7 +120,7 @@ ApacheConf:
Apex:
type: programming
lexer: Text only
lexer: Java
extensions:
- .cls
@@ -173,6 +174,7 @@ Assembly:
- nasm
extensions:
- .asm
- .a51
Augeas:
type: programming
@@ -284,8 +286,9 @@ C:
C#:
type: programming
ace_mode: csharp
tm_scope: source.cs
search_term: csharp
color: "#5a25a2"
color: "#178600"
aliases:
- csharp
extensions:
@@ -411,6 +414,7 @@ Clojure:
CoffeeScript:
type: programming
tm_scope: source.coffee
ace_mode: coffee
color: "#244776"
aliases:
@@ -453,6 +457,7 @@ ColdFusion CFC:
Common Lisp:
type: programming
tm_scope: source.lisp
color: "#3fb68b"
aliases:
- lisp
@@ -648,6 +653,7 @@ Elm:
Emacs Lisp:
type: programming
lexer: Common Lisp
tm_scope: source.lisp
color: "#c065db"
aliases:
- elisp
@@ -748,6 +754,7 @@ Forth:
- .fth
- .4th
- .forth
- .frt
Frege:
type: programming
@@ -756,6 +763,14 @@ Frege:
extensions:
- .fr
G-code:
type: data
lexer: Text only
extensions:
- .g
- .gco
- .gcode
Game Maker Language:
type: programming
color: "#8ad353"
@@ -785,6 +800,12 @@ GAS:
- .s
- .S
GDScript:
type: programming
lexer: Text only
extensions:
- .gd
GLSL:
group: C
type: programming
@@ -877,6 +898,12 @@ Grammatical Framework:
searchable: true
color: "#ff0000"
Graph Modeling Language:
type: data
lexer: Text only
extensions:
- .gml
Groff:
extensions:
- .man
@@ -911,6 +938,7 @@ Groovy Server Pages:
HTML:
type: markup
tm_scope: text.html.basic
ace_mode: html
aliases:
- xhtml
@@ -922,6 +950,7 @@ HTML:
HTML+Django:
type: markup
tm_scope: text.html.django
group: HTML
lexer: HTML+Django/Jinja
extensions:
@@ -930,6 +959,7 @@ HTML+Django:
HTML+ERB:
type: markup
tm_scope: text.html.ruby
group: HTML
lexer: RHTML
aliases:
@@ -940,6 +970,7 @@ HTML+ERB:
HTML+PHP:
type: markup
tm_scope: text.html.php
group: HTML
extensions:
- .phtml
@@ -959,6 +990,8 @@ Haml:
Handlebars:
type: markup
lexer: Handlebars
aliases:
- hbs
extensions:
- .handlebars
- .hbs
@@ -1075,6 +1108,7 @@ J:
JSON:
type: data
tm_scope: source.json
group: JavaScript
ace_mode: json
searchable: false
@@ -1137,6 +1171,7 @@ Java Server Pages:
JavaScript:
type: programming
tm_scope: source.js
ace_mode: javascript
color: "#f1e05a"
aliases:
@@ -1149,6 +1184,7 @@ JavaScript:
- .es6
- .frag
- .jake
- .jsb
- .jsfl
- .jsm
- .jss
@@ -1202,7 +1238,17 @@ LFE:
LLVM:
extensions:
- .ll
LSL:
type: programming
lexer: LSL
ace_mode: lsl
extensions:
- .lsl
interpreters:
- lsl
color: '#3d9970'
LabVIEW:
type: programming
lexer: Text only
@@ -1254,6 +1300,7 @@ Literate Agda:
Literate CoffeeScript:
type: programming
tm_scope: source.litcoffee
group: CoffeeScript
lexer: Text only
ace_mode: markdown
@@ -1310,6 +1357,7 @@ Lua:
color: "#fa1fa1"
extensions:
- .lua
- .fcgi
- .nse
- .pd_lua
- .rbxs
@@ -1537,6 +1585,7 @@ ObjDump:
Objective-C:
type: programming
tm_scope: source.objc
color: "#438eff"
aliases:
- obj-c
@@ -1547,6 +1596,7 @@ Objective-C:
Objective-C++:
type: programming
tm_scope: source.objc++
color: "#4886FC"
aliases:
- obj-c++
@@ -1637,12 +1687,14 @@ PAWN:
PHP:
type: programming
tm_scope: text.html.php
ace_mode: php
color: "#4F5D95"
extensions:
- .php
- .aw
- .ctp
- .fcgi
- .module
- .php3
- .php4
@@ -1694,6 +1746,7 @@ Pascal:
- .dfm
- .dpr
- .lpr
- .pp
Perl:
type: programming
@@ -1782,11 +1835,13 @@ Processing:
Prolog:
type: programming
lexer: Logtalk
color: "#74283c"
extensions:
- .prolog
- .ecl
- .pl
- .ecl
- .pro
- .prolog
Propeller Spin:
type: programming
@@ -1831,6 +1886,8 @@ Python:
color: "#3581ba"
extensions:
- .py
- .cgi
- .fcgi
- .gyp
- .lmi
- .pyde
@@ -1992,6 +2049,7 @@ Ruby:
extensions:
- .rb
- .builder
- .fcgi
- .gemspec
- .god
- .irbrc
@@ -2039,6 +2097,7 @@ SAS:
SCSS:
type: markup
tm_scope: source.scss
group: CSS
ace_mode: scss
extensions:
@@ -2054,6 +2113,7 @@ SQF:
SQL:
type: data
tm_scope: source.sql
ace_mode: sql
extensions:
- .sql
@@ -2078,6 +2138,7 @@ Sage:
Sass:
type: markup
tm_scope: source.sass
group: CSS
extensions:
- .sass
@@ -2140,6 +2201,8 @@ Shell:
- .sh
- .bash
- .bats
- .cgi
- .fcgi
- .tmux
- .zsh
interpreters:
@@ -2271,6 +2334,9 @@ Tcl:
- .tcl
- .adp
- .tm
interpreters:
- tclsh
- wish
Tcsh:
type: programming
@@ -2402,6 +2468,7 @@ VimL:
- .vim
filenames:
- .vimrc
- _vimrc
- vimrc
- gvimrc
@@ -2551,6 +2618,7 @@ Xtend:
YAML:
type: data
tm_scope: source.yaml
aliases:
- yml
extensions:

View File

@@ -1,8 +1,13 @@
require 'linguist/blob_helper'
require 'linguist/language'
require 'rugged'
module Linguist
class LazyBlob
GIT_ATTR = ['linguist-language', 'linguist-vendored']
GIT_ATTR_OPTS = { :priority => [:index], :skip_system => true }
GIT_ATTR_FLAGS = Rugged::Repository::Attributes.parse_opts(GIT_ATTR_OPTS)
include BlobHelper
MAX_SIZE = 128 * 1024
@@ -19,6 +24,29 @@ module Linguist
@mode = mode
end
def git_attributes
@git_attributes ||= repository.fetch_attributes(
name, GIT_ATTR, GIT_ATTR_FLAGS)
end
def vendored?
if attr = git_attributes['linguist-vendored']
return boolean_attribute(attr)
else
return super
end
end
def language
return @language if defined?(@language)
@language = if lang = git_attributes['linguist-language']
Language.find_by_name(lang)
else
super
end
end
def data
load_blob!
@data
@@ -30,6 +58,12 @@ module Linguist
end
protected
# Returns true if the attribute is present and not the string "false".
def boolean_attribute(attr)
attr != "false"
end
def load_blob!
@data, @size = Rugged::Blob.to_buffer(repository, oid, MAX_SIZE) if @data.nil?
end

View File

@@ -110,18 +110,37 @@ module Linguist
if @old_commit_oid == @commit_oid
@old_stats
else
compute_stats(@old_commit_oid, @commit_oid, @old_stats)
compute_stats(@old_commit_oid, @old_stats)
end
end
end
protected
def compute_stats(old_commit_oid, commit_oid, cache = nil)
file_map = cache ? cache.dup : {}
old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
new_tree = Rugged::Commit.lookup(repository, commit_oid).tree
def read_index
attr_index = Rugged::Index.new
attr_index.read_tree(current_tree)
repository.index = attr_index
end
diff = Rugged::Tree.diff(repository, old_tree, new_tree)
def current_tree
@tree ||= Rugged::Commit.lookup(repository, @commit_oid).tree
end
protected
def compute_stats(old_commit_oid, cache = nil)
old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
read_index
diff = Rugged::Tree.diff(repository, old_tree, current_tree)
# Clear file map and fetch full diff if any .gitattributes files are changed
if cache && diff.each_delta.any? { |delta| File.basename(delta.new_file[:path]) == ".gitattributes" }
diff = Rugged::Tree.diff(repository, old_tree = nil, current_tree)
file_map = {}
else
file_map = cache ? cache.dup : {}
end
diff.each_delta do |delta|
old = delta.old_file[:path]

File diff suppressed because it is too large Load Diff

View File

@@ -17,9 +17,11 @@ module Linguist
PATH = File.expand_path('../samples.json', __FILE__)
# Hash of serialized samples object
if File.exist?(PATH)
serializer = defined?(JSON) ? JSON : YAML
DATA = serializer.load(File.read(PATH))
def self.cache
@cache ||= begin
serializer = defined?(JSON) ? JSON : YAML
serializer.load(File.read(PATH))
end
end
# Public: Iterate over each sample.

View File

@@ -33,6 +33,9 @@
# Erlang bundles
- ^rebar$
# Go dependencies
- Godeps/_workspace/
# Bootstrap minified css and js
- (^|/)bootstrap([^.]*)(\.min)?\.(js|css)$
@@ -235,3 +238,7 @@
- octicons.css
- octicons.min.css
- sprockets-octicons.scss
# Typesafe Activator
- (^|/)activator$
- (^|/)activator\.bat$

View File

@@ -1,3 +1,3 @@
module Linguist
VERSION = "3.1.2"
VERSION = "3.4.0"
end