Merge branch 'master' into go-vendor

This commit is contained in:
Arfon Smith
2016-10-26 18:34:02 -04:00
719 changed files with 83506 additions and 9105 deletions

View File

@@ -1,5 +1,6 @@
require 'linguist/blob_helper'
require 'linguist/generated'
require 'linguist/grammars'
require 'linguist/heuristics'
require 'linguist/language'
require 'linguist/repository'
@@ -8,8 +9,85 @@ require 'linguist/shebang'
require 'linguist/version'
class << Linguist
# Public: Detects the Language of the blob.
#
# blob - an object that includes the Linguist `BlobHelper` interface;
# see Linguist::LazyBlob and Linguist::FileBlob for examples
#
# Returns Language or nil.
def detect(blob)
# Bail early if the blob is binary or empty.
return nil if blob.likely_binary? || blob.binary? || blob.empty?
Linguist.instrument("linguist.detection", :blob => blob) do
# Call each strategy until one candidate is returned.
languages = []
returning_strategy = nil
STRATEGIES.each do |strategy|
returning_strategy = strategy
candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
strategy.call(blob, languages)
end
if candidates.size == 1
languages = candidates
break
elsif candidates.size > 1
# More than one candidate was found, pass them to the next strategy.
languages = candidates
else
# No candidates, try the next strategy
end
end
Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
languages.first
end
end
# Internal: The strategies used to detect the language of a file.
#
# A strategy is an object that has a `.call` method that takes two arguments:
#
# blob - An object that quacks like a blob.
# languages - An Array of candidate Language objects that were returned by the
#     previous strategy.
#
# A strategy should return an Array of Language candidates.
#
# Strategies are called in turn until a single Language is returned.
STRATEGIES = [
Linguist::Strategy::Modeline,
Linguist::Shebang,
Linguist::Strategy::Filename,
Linguist::Heuristics,
Linguist::Classifier
]
# Public: Set an instrumenter.
#
# class CustomInstrumenter
# def instrument(name, payload = {})
# warn "Instrumenting #{name}: #{payload[:blob]}"
# end
# end
#
# Linguist.instrumenter = CustomInstrumenter
#
# The instrumenter must conform to the `ActiveSupport::Notifications`
# interface, which defines `#instrument` and accepts:
#
# name - the String name of the event (e.g. "linguist.detected")
# payload - a Hash of the exception context.
attr_accessor :instrumenter
# Internal: Perform instrumentation on a block
#
# Linguist.instrument("linguist.dosomething", :blob => blob) do
# # logic to instrument here.
# end
#
def instrument(*args, &bk)
if instrumenter
instrumenter.instrument(*args, &bk)
@@ -17,4 +95,5 @@ class << Linguist
yield
end
end
end

View File

@@ -63,7 +63,7 @@ module Linguist
#
# Returns an Array
def extensions
basename, *segments = name.downcase.split(".")
_, *segments = name.downcase.split(".")
segments.map.with_index do |segment, index|
"." + segments[index..-1].join(".")

View File

@@ -6,7 +6,7 @@ require 'yaml'
module Linguist
# DEPRECATED Avoid mixing into Blob classes. Prefer functional interfaces
# like `Language.detect` over `Blob#language`. Functions are much easier to
# like `Linguist.detect` over `Blob#language`. Functions are much easier to
# cache and compose.
#
# Avoid adding additional bloat to this module.
@@ -325,7 +325,7 @@ module Linguist
#
# Returns a Language or nil if none is detected
def language
@language ||= Language.detect(self)
@language ||= Linguist.detect(self)
end
# Internal: Get the TextMate compatible scope for the blob

View File

@@ -57,9 +57,11 @@ module Linguist
composer_lock? ||
node_modules? ||
go_vendor? ||
npm_shrinkwrap? ||
godeps? ||
generated_by_zephir? ||
minified_files? ||
has_source_map? ||
source_map? ||
compiled_coffeescript? ||
generated_parser? ||
@@ -105,6 +107,21 @@ module Linguist
end
end
# Internal: Does the blob contain a source map reference?
#
# We assume that if one of the last 2 lines starts with a source map
# reference, then the current file was generated from other files.
#
# We use the last 2 lines because the last line might be empty.
#
# We only handle JavaScript, no CSS support yet.
#
# Returns true or false.
def has_source_map?
return false unless extname.downcase == '.js'
lines.last(2).any? { |line| line.start_with?('//# sourceMappingURL') }
end
# Internal: Is the blob a generated source map?
#
# Source Maps usually have .css.map or .js.map extensions. In case they
@@ -298,6 +315,13 @@ module Linguist
!!name.match(/\Avendor\//)
end
# Internal: Is the blob a generated npm shrinkwrap file.
#
# Returns true or false.
def npm_shrinkwrap?
!!name.match(/npm-shrinkwrap\.json/)
end
# Internal: Is the blob part of Godeps/,
# which are not meant for humans in pull requests.
#
@@ -350,14 +374,14 @@ module Linguist
# on the first line.
#
# GFortran module files contain:
# GFORTRAN module version 'x' created from
# GFORTRAN module version 'x' created from
# on the first line.
#
# Return true of false
def generated_module?
return false unless extname == '.mod'
return false unless lines.count > 1
return lines[0].include?("PCBNEW-LibModule-V") ||
return lines[0].include?("PCBNEW-LibModule-V") ||
lines[0].include?("GFORTRAN module version '")
end

View File

@@ -1,6 +1,3 @@
# Note: This file is included in the github-linguist-grammars gem, not the
# github-linguist gem.
module Linguist
module Grammars
# Get the path to the directory containing the language grammar JSON files.

View File

@@ -86,6 +86,14 @@ module Linguist
end
end
disambiguate ".builds" do |data|
if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
Language["XML"]
else
Language["Text"]
end
end
disambiguate ".ch" do |data|
if /^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b/i.match(data)
Language["xBase"]
@@ -127,11 +135,31 @@ module Linguist
Language["ECL"]
end
end
disambiguate ".es" do |data|
if /^\s*(?:%%|main\s*\(.*?\)\s*->)/.match(data)
Language["Erlang"]
elsif /(?:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)/m.match(data)
Language["JavaScript"]
end
end
disambiguate ".for", ".f" do |data|
fortran_rx = /^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)/i
disambiguate ".f" do |data|
if /^: /.match(data)
Language["Forth"]
elsif /^([c*][^abd-z]| (subroutine|program|end)\s|\s*!)/i.match(data)
elsif data.include?("flowop")
Language["Filebench WML"]
elsif fortran_rx.match(data)
Language["FORTRAN"]
end
end
disambiguate ".for" do |data|
if /^: /.match(data)
Language["Forth"]
elsif fortran_rx.match(data)
Language["FORTRAN"]
end
end
@@ -171,6 +199,14 @@ module Linguist
end
end
disambiguate ".inc" do |data|
if /^<\?(?:php)?/.match(data)
Language["PHP"]
elsif /^\s*#(declare|local|macro|while)\s/.match(data)
Language["POV-Ray SDL"]
end
end
disambiguate ".l" do |data|
if /\(def(un|macro)\s/.match(data)
Language["Common Lisp"]
@@ -208,7 +244,7 @@ module Linguist
Language["MUF"]
elsif /^\s*;/.match(data)
Language["M"]
elsif /^\s*\(\*/.match(data)
elsif /\*\)$/.match(data)
Language["Mathematica"]
elsif /^\s*%/.match(data)
Language["Matlab"]
@@ -217,6 +253,14 @@ module Linguist
end
end
disambiguate ".md" do |data|
if /(^[-a-z0-9=#!\*\[|])|<\//i.match(data) || data.empty?
Language["Markdown"]
elsif /^(;;|\(define_)/.match(data)
Language["GCC machine description"]
end
end
disambiguate ".ml" do |data|
if /(^\s*module)|let rec |match\s+(\S+\s)+with/.match(data)
Language["OCaml"]
@@ -313,14 +357,38 @@ module Linguist
end
end
disambiguate ".props" do |data|
if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
Language["XML"]
elsif /\w+\s*=\s*/i.match(data)
Language["INI"]
end
end
disambiguate ".r" do |data|
if /\bRebol\b/i.match(data)
Language["Rebol"]
elsif data.include?("<-")
elsif /<-|^\s*#/.match(data)
Language["R"]
end
end
disambiguate ".rno" do |data|
if /^\.!|^\.end lit(?:eral)?\b/i.match(data)
Language["RUNOFF"]
elsif /^\.\\" /.match(data)
Language["Groff"]
end
end
disambiguate ".rpy" do |data|
if /(^(import|from|class|def)\s)/m.match(data)
Language["Python"]
else
Language["Ren'Py"]
end
end
disambiguate ".rs" do |data|
if /^(use |fn |mod |pub |macro_rules|impl|#!?\[)/.match(data)
Language["Rust"]
@@ -338,13 +406,13 @@ module Linguist
end
disambiguate ".sql" do |data|
if /^\\i\b|AS \$\$|LANGUAGE '+plpgsql'+/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
if /^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
#Postgres
Language["PLpgSQL"]
elsif /(alter module)|(language sql)|(begin( NOT)+ atomic)/i.match(data) || /signal SQLSTATE '[0-9]+'/i.match(data)
#IBM db2
Language["SQLPL"]
elsif /pragma|\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
elsif /\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
#Oracle
Language["PLSQL"]
elsif ! /begin|boolean|package|exception/i.match(data)
@@ -352,9 +420,31 @@ module Linguist
Language["SQL"]
end
end
disambiguate ".srt" do |data|
if /^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$/.match(data)
Language["SubRip Text"]
end
end
disambiguate ".t" do |data|
if /^\s*%|^\s*var\s+\w+\s*:\s*\w+/.match(data)
Language["Turing"]
elsif /^\s*use\s+v6\s*;/.match(data)
Language["Perl6"]
end
end
disambiguate ".toc" do |data|
if /^## |@no-lib-strip@/.match(data)
Language["World of Warcraft Addon Data"]
elsif /^\\(contentsline|defcounter|beamer|boolfalse)/.match(data)
Language["TeX"]
end
end
disambiguate ".ts" do |data|
if data.include?("<TS ")
if data.include?("<TS")
Language["XML"]
else
Language["TypeScript"]

View File

@@ -20,10 +20,11 @@ module Linguist
#
# Languages are defined in `lib/linguist/languages.yml`.
class Language
@languages = []
@index = {}
@name_index = {}
@alias_index = {}
@languages = []
@index = {}
@name_index = {}
@alias_index = {}
@language_id_index = {}
@extension_index = Hash.new { |h,k| h[k] = [] }
@interpreter_index = Hash.new { |h,k| h[k] = [] }
@@ -84,17 +85,11 @@ module Linguist
@filename_index[filename] << language
end
@language_id_index[language.language_id] = language
language
end
STRATEGIES = [
Linguist::Strategy::Modeline,
Linguist::Shebang,
Linguist::Strategy::Filename,
Linguist::Heuristics,
Linguist::Classifier
]
# Public: Detects the Language of the blob.
#
# blob - an object that includes the Linguist `BlobHelper` interface;
@@ -102,34 +97,8 @@ module Linguist
#
# Returns Language or nil.
def self.detect(blob)
# Bail early if the blob is binary or empty.
return nil if blob.likely_binary? || blob.binary? || blob.empty?
Linguist.instrument("linguist.detection", :blob => blob) do
# Call each strategy until one candidate is returned.
languages = []
returning_strategy = nil
STRATEGIES.each do |strategy|
returning_strategy = strategy
candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
strategy.call(blob, languages)
end
if candidates.size == 1
languages = candidates
break
elsif candidates.size > 1
# More than one candidate was found, pass them to the next strategy.
languages = candidates
else
# No candidates, try the next strategy
end
end
Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
languages.first
end
warn "[DEPRECATED] `Linguist::Language.detect` is deprecated. Use `Linguist.detect`. #{caller[0]}"
Linguist.detect(blob)
end
# Public: Get all Languages
@@ -227,6 +196,19 @@ module Linguist
@interpreter_index[interpreter]
end
# Public: Look up Languages by its language_id.
#
# language_id - Integer of language_id
#
# Examples
#
# Language.find_by_id(100)
# # => [#<Language name="Elixir">]
#
# Returns the matching Language
def self.find_by_id(language_id)
@language_id_index[language_id.to_i]
end
# Public: Look up Language by its name.
#
@@ -285,6 +267,7 @@ module Linguist
# Returns an Array of Languages.
def self.ace_modes
warn "This method will be deprecated in a future 5.x release. Every language now has an `ace_mode` set."
warn caller
@ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
end
@@ -318,11 +301,16 @@ module Linguist
end
@ace_mode = attributes[:ace_mode]
@codemirror_mode = attributes[:codemirror_mode]
@codemirror_mime_type = attributes[:codemirror_mime_type]
@wrap = attributes[:wrap] || false
# Set legacy search term
@search_term = attributes[:search_term] || default_alias_name
# Set the language_id
@language_id = attributes[:language_id]
# Set extensions or default to [].
@extensions = attributes[:extensions] || []
@interpreters = attributes[:interpreters] || []
@@ -385,6 +373,17 @@ module Linguist
# Returns the name String
attr_reader :search_term
# Public: Get language_id (used in GitHub search)
#
# Examples
#
# # => "1"
# # => "2"
# # => "3"
#
# Returns the integer language_id
attr_reader :language_id
# Public: Get the name of a TextMate-compatible scope
#
# Returns the scope
@@ -401,6 +400,31 @@ module Linguist
# Returns a String name or nil
attr_reader :ace_mode
# Public: Get CodeMirror mode
#
# Maps to a directory in the `mode/` source code.
# https://github.com/codemirror/CodeMirror/tree/master/mode
#
# Examples
#
# # => "nil"
# # => "javascript"
# # => "clike"
#
# Returns a String name or nil
attr_reader :codemirror_mode
# Public: Get CodeMirror MIME type mode
#
# Examples
#
# # => "nil"
# # => "text/x-javascript"
# # => "text/x-csrc"
#
# Returns a String name or nil
attr_reader :codemirror_mime_type
# Public: Should language lines be wrapped
#
# Returns true or false
@@ -577,10 +601,13 @@ module Linguist
:aliases => options['aliases'],
:tm_scope => options['tm_scope'],
:ace_mode => options['ace_mode'],
:codemirror_mode => options['codemirror_mode'],
:codemirror_mime_type => options['codemirror_mime_type'],
:wrap => options['wrap'],
:group_name => options['group'],
:searchable => options.fetch('searchable', true),
:search_term => options['search_term'],
:language_id => options['language_id'],
:extensions => Array(options['extensions']),
:interpreters => options['interpreters'].sort,
:filenames => options['filenames'],

3679
lib/linguist/languages.yml Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -28,6 +28,7 @@ module Linguist
@oid = oid
@path = path
@mode = mode
@data = nil
end
def git_attributes

View File

@@ -30,6 +30,9 @@ module Linguist
@repository = repo
@commit_oid = commit_oid
@old_commit_oid = nil
@old_stats = nil
raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
end

View File

@@ -21,7 +21,7 @@ module Linguist
def self.cache
@cache ||= begin
serializer = defined?(Yajl) ? Yajl : YAML
serializer.load(File.read(PATH))
serializer.load(File.read(PATH, encoding: 'utf-8'))
end
end

View File

@@ -42,10 +42,10 @@ module Linguist
return unless script
# "python2.6" -> "python2"
script.sub! /(\.\d+)$/, ''
script.sub!(/(\.\d+)$/, '')
# #! perl -> perl
script.sub! /^#!\s*/, ''
script.sub!(/^#!\s*/, '')
# Check for multiline shebang hacks that call `exec`
if script == 'sh' &&

View File

@@ -1,19 +1,102 @@
module Linguist
module Strategy
class Modeline
EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
EMACS_MODELINE = /
-\*-
(?:
# Short form: `-*- ruby -*-`
\s* (?= [^:;\s]+ \s* -\*-)
|
# Longer form: `-*- foo:bar; mode: ruby; -*-`
(?:
.*? # Preceding variables: `-*- foo:bar bar:baz;`
[;\s] # Which are delimited by spaces or semicolons
|
(?<=-\*-) # Not preceded by anything: `-*-mode:ruby-*-`
)
mode # Major mode indicator
\s*:\s* # Allow whitespace around colon: `mode : ruby`
)
([^:;\s]+) # Name of mode
# First form vim modeline
# [text]{white}{vi:|vim:|ex:}[white]{options}
# ex: 'vim: syntax=ruby'
VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i
# Ensure the mode is terminated correctly
(?=
# Followed by semicolon or whitespace
[\s;]
|
# Touching the ending sequence: `ruby-*-`
(?<![-*]) # Don't allow stuff like `ruby--*-` to match; it'll invalidate the mode
-\*- # Emacs has no problems reading `ruby --*-`, however.
)
.*? # Anything between a cleanly-terminated mode and the ending -*-
-\*-
/xi
# Second form vim modeline (compatible with some versions of Vi)
# [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text]
# ex: 'vim set syntax=ruby:'
VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i
VIM_MODELINE = /
MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2]
# Start modeline. Could be `vim:`, `vi:` or `ex:`
(?:
(?:\s|^)
vi
(?:m[<=>]?\d+|m)? # Version-specific modeline
|
[\t\x20] # `ex:` requires whitespace, because "ex:" might be short for "example:"
ex
)
# If the option-list begins with `set ` or `se `, it indicates an alternative
# modeline syntax partly-compatible with older versions of Vi. Here, the colon
# serves as a terminator for an option sequence, delimited by whitespace.
(?=
# So we have to ensure the modeline ends with a colon
: (?=\s* set? \s [^\n:]+ :) |
# Otherwise, it isn't valid syntax and should be ignored
: (?!\s* set? \s)
)
# Possible (unrelated) `option=value` pairs to skip past
(?:
# Option separator. Vim uses whitespace or colons to separate options (except if
# the alternate "vim: set " form is used, where only whitespace is used)
(?:
\s
|
\s* : \s* # Note that whitespace around colons is accepted too:
) # vim: noai : ft=ruby:noexpandtab
# Option's name. All recognised Vim options have an alphanumeric form.
\w*
# Possible value. Not every option takes an argument.
(?:
# Whitespace between name and value is allowed: `vim: ft =ruby`
\s*=
# Option's value. Might be blank; `vim: ft= ` says "use no filetype".
(?:
[^\\\s] # Beware of escaped characters: titlestring=\ ft=ruby
| # will be read by Vim as { titlestring: " ft=ruby" }.
\\.
)*
)?
)*
# The actual filetype declaration
[\s:] (?:filetype|ft|syntax) \s*=
# Language's name
(\w+)
# Ensure it's followed by a legal separator
(?=\s|:|$)
/xi
MODELINES = [EMACS_MODELINE, VIM_MODELINE]
# Scope of the search for modelines
# Number of lines to check at the beginning and at the end of the file
SEARCH_SCOPE = 5
# Public: Detects language based on Vim and Emacs modelines
#
@@ -26,7 +109,9 @@ module Linguist
# Returns an Array with one Language if the blob has a Vim or Emacs modeline
# that matches a Language name or alias. Returns an empty array if no match.
def self.call(blob, _ = nil)
Array(Language.find_by_alias(modeline(blob.data)))
header = blob.lines.first(SEARCH_SCOPE).join("\n")
footer = blob.lines.last(SEARCH_SCOPE).join("\n")
Array(Language.find_by_alias(modeline(header + footer)))
end
# Public: Get the modeline from the first n-lines of the file

View File

@@ -15,15 +15,25 @@
# Dependencies
- ^[Dd]ependencies/
# Distributions
- (^|/)dist/
# C deps
# https://github.com/joyent/node
- ^deps/
- ^tools/
- (^|/)configure$
- (^|/)configure.ac$
- (^|/)config.guess$
- (^|/)config.sub$
# stuff autogenerated by autoconf - still C deps
- (^|/)aclocal.m4
- (^|/)libtool.m4
- (^|/)ltoptions.m4
- (^|/)ltsugar.m4
- (^|/)ltversion.m4
- (^|/)lt~obsolete.m4
# Linters
- cpplint.py
@@ -146,13 +156,19 @@
- (^|/)tiny_mce([^.]*)\.js$
- (^|/)tiny_mce/(langs|plugins|themes|utils)
# Ace Editor
- (^|/)ace-builds/
# Fontello CSS files
- (^|/)fontello(.*?)\.css$
# MathJax
- (^|/)MathJax/
# Chart.js
- (^|/)Chart\.js$
# Codemirror
# CodeMirror
- (^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)
# SyntaxHighlighter - http://alexgorbatchev.com/
@@ -183,6 +199,7 @@
# django
- (^|/)admin_media/
- (^|/)env/
# Fabric
- ^fabfile\.py$
@@ -215,6 +232,9 @@
# Fabric
- Fabric.framework/
# BuddyBuild
- BuddyBuildSDK.framework/
# git config files
- gitattributes$
- gitignore$
@@ -302,3 +322,6 @@
# Android Google APIs
- (^|/)\.google_apis/
# Jenkins Pipeline
- ^Jenkinsfile$

View File

@@ -1,3 +1,3 @@
module Linguist
VERSION = "4.7.5"
VERSION = "4.8.16"
end