mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge branch 'master' into go-vendor
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
require 'linguist/blob_helper'
|
||||
require 'linguist/generated'
|
||||
require 'linguist/grammars'
|
||||
require 'linguist/heuristics'
|
||||
require 'linguist/language'
|
||||
require 'linguist/repository'
|
||||
@@ -8,8 +9,85 @@ require 'linguist/shebang'
|
||||
require 'linguist/version'
|
||||
|
||||
class << Linguist
|
||||
# Public: Detects the Language of the blob.
|
||||
#
|
||||
# blob - an object that includes the Linguist `BlobHelper` interface;
|
||||
# see Linguist::LazyBlob and Linguist::FileBlob for examples
|
||||
#
|
||||
# Returns Language or nil.
|
||||
def detect(blob)
|
||||
# Bail early if the blob is binary or empty.
|
||||
return nil if blob.likely_binary? || blob.binary? || blob.empty?
|
||||
|
||||
Linguist.instrument("linguist.detection", :blob => blob) do
|
||||
# Call each strategy until one candidate is returned.
|
||||
languages = []
|
||||
returning_strategy = nil
|
||||
|
||||
STRATEGIES.each do |strategy|
|
||||
returning_strategy = strategy
|
||||
candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
|
||||
strategy.call(blob, languages)
|
||||
end
|
||||
if candidates.size == 1
|
||||
languages = candidates
|
||||
break
|
||||
elsif candidates.size > 1
|
||||
# More than one candidate was found, pass them to the next strategy.
|
||||
languages = candidates
|
||||
else
|
||||
# No candidates, try the next strategy
|
||||
end
|
||||
end
|
||||
|
||||
Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
|
||||
|
||||
languages.first
|
||||
end
|
||||
end
|
||||
|
||||
# Internal: The strategies used to detect the language of a file.
|
||||
#
|
||||
# A strategy is an object that has a `.call` method that takes two arguments:
|
||||
#
|
||||
# blob - An object that quacks like a blob.
|
||||
# languages - An Array of candidate Language objects that were returned by the
|
||||
# previous strategy.
|
||||
#
|
||||
# A strategy should return an Array of Language candidates.
|
||||
#
|
||||
# Strategies are called in turn until a single Language is returned.
|
||||
STRATEGIES = [
|
||||
Linguist::Strategy::Modeline,
|
||||
Linguist::Shebang,
|
||||
Linguist::Strategy::Filename,
|
||||
Linguist::Heuristics,
|
||||
Linguist::Classifier
|
||||
]
|
||||
|
||||
# Public: Set an instrumenter.
|
||||
#
|
||||
# class CustomInstrumenter
|
||||
# def instrument(name, payload = {})
|
||||
# warn "Instrumenting #{name}: #{payload[:blob]}"
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# Linguist.instrumenter = CustomInstrumenter
|
||||
#
|
||||
# The instrumenter must conform to the `ActiveSupport::Notifications`
|
||||
# interface, which defines `#instrument` and accepts:
|
||||
#
|
||||
# name - the String name of the event (e.g. "linguist.detected")
|
||||
# payload - a Hash of the exception context.
|
||||
attr_accessor :instrumenter
|
||||
|
||||
# Internal: Perform instrumentation on a block
|
||||
#
|
||||
# Linguist.instrument("linguist.dosomething", :blob => blob) do
|
||||
# # logic to instrument here.
|
||||
# end
|
||||
#
|
||||
def instrument(*args, &bk)
|
||||
if instrumenter
|
||||
instrumenter.instrument(*args, &bk)
|
||||
@@ -17,4 +95,5 @@ class << Linguist
|
||||
yield
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -63,7 +63,7 @@ module Linguist
|
||||
#
|
||||
# Returns an Array
|
||||
def extensions
|
||||
basename, *segments = name.downcase.split(".")
|
||||
_, *segments = name.downcase.split(".")
|
||||
|
||||
segments.map.with_index do |segment, index|
|
||||
"." + segments[index..-1].join(".")
|
||||
|
||||
@@ -6,7 +6,7 @@ require 'yaml'
|
||||
|
||||
module Linguist
|
||||
# DEPRECATED Avoid mixing into Blob classes. Prefer functional interfaces
|
||||
# like `Language.detect` over `Blob#language`. Functions are much easier to
|
||||
# like `Linguist.detect` over `Blob#language`. Functions are much easier to
|
||||
# cache and compose.
|
||||
#
|
||||
# Avoid adding additional bloat to this module.
|
||||
@@ -325,7 +325,7 @@ module Linguist
|
||||
#
|
||||
# Returns a Language or nil if none is detected
|
||||
def language
|
||||
@language ||= Language.detect(self)
|
||||
@language ||= Linguist.detect(self)
|
||||
end
|
||||
|
||||
# Internal: Get the TextMate compatible scope for the blob
|
||||
|
||||
@@ -57,9 +57,11 @@ module Linguist
|
||||
composer_lock? ||
|
||||
node_modules? ||
|
||||
go_vendor? ||
|
||||
npm_shrinkwrap? ||
|
||||
godeps? ||
|
||||
generated_by_zephir? ||
|
||||
minified_files? ||
|
||||
has_source_map? ||
|
||||
source_map? ||
|
||||
compiled_coffeescript? ||
|
||||
generated_parser? ||
|
||||
@@ -105,6 +107,21 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
# Internal: Does the blob contain a source map reference?
|
||||
#
|
||||
# We assume that if one of the last 2 lines starts with a source map
|
||||
# reference, then the current file was generated from other files.
|
||||
#
|
||||
# We use the last 2 lines because the last line might be empty.
|
||||
#
|
||||
# We only handle JavaScript, no CSS support yet.
|
||||
#
|
||||
# Returns true or false.
|
||||
def has_source_map?
|
||||
return false unless extname.downcase == '.js'
|
||||
lines.last(2).any? { |line| line.start_with?('//# sourceMappingURL') }
|
||||
end
|
||||
|
||||
# Internal: Is the blob a generated source map?
|
||||
#
|
||||
# Source Maps usually have .css.map or .js.map extensions. In case they
|
||||
@@ -298,6 +315,13 @@ module Linguist
|
||||
!!name.match(/\Avendor\//)
|
||||
end
|
||||
|
||||
# Internal: Is the blob a generated npm shrinkwrap file.
|
||||
#
|
||||
# Returns true or false.
|
||||
def npm_shrinkwrap?
|
||||
!!name.match(/npm-shrinkwrap\.json/)
|
||||
end
|
||||
|
||||
# Internal: Is the blob part of Godeps/,
|
||||
# which are not meant for humans in pull requests.
|
||||
#
|
||||
@@ -350,14 +374,14 @@ module Linguist
|
||||
# on the first line.
|
||||
#
|
||||
# GFortran module files contain:
|
||||
# GFORTRAN module version 'x' created from
|
||||
# GFORTRAN module version 'x' created from
|
||||
# on the first line.
|
||||
#
|
||||
# Return true of false
|
||||
def generated_module?
|
||||
return false unless extname == '.mod'
|
||||
return false unless lines.count > 1
|
||||
return lines[0].include?("PCBNEW-LibModule-V") ||
|
||||
return lines[0].include?("PCBNEW-LibModule-V") ||
|
||||
lines[0].include?("GFORTRAN module version '")
|
||||
end
|
||||
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
# Note: This file is included in the github-linguist-grammars gem, not the
|
||||
# github-linguist gem.
|
||||
|
||||
module Linguist
|
||||
module Grammars
|
||||
# Get the path to the directory containing the language grammar JSON files.
|
||||
|
||||
@@ -86,6 +86,14 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".builds" do |data|
|
||||
if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
|
||||
Language["XML"]
|
||||
else
|
||||
Language["Text"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".ch" do |data|
|
||||
if /^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b/i.match(data)
|
||||
Language["xBase"]
|
||||
@@ -127,11 +135,31 @@ module Linguist
|
||||
Language["ECL"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".es" do |data|
|
||||
if /^\s*(?:%%|main\s*\(.*?\)\s*->)/.match(data)
|
||||
Language["Erlang"]
|
||||
elsif /(?:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)/m.match(data)
|
||||
Language["JavaScript"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".for", ".f" do |data|
|
||||
fortran_rx = /^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)/i
|
||||
|
||||
disambiguate ".f" do |data|
|
||||
if /^: /.match(data)
|
||||
Language["Forth"]
|
||||
elsif /^([c*][^abd-z]| (subroutine|program|end)\s|\s*!)/i.match(data)
|
||||
elsif data.include?("flowop")
|
||||
Language["Filebench WML"]
|
||||
elsif fortran_rx.match(data)
|
||||
Language["FORTRAN"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".for" do |data|
|
||||
if /^: /.match(data)
|
||||
Language["Forth"]
|
||||
elsif fortran_rx.match(data)
|
||||
Language["FORTRAN"]
|
||||
end
|
||||
end
|
||||
@@ -171,6 +199,14 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".inc" do |data|
|
||||
if /^<\?(?:php)?/.match(data)
|
||||
Language["PHP"]
|
||||
elsif /^\s*#(declare|local|macro|while)\s/.match(data)
|
||||
Language["POV-Ray SDL"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".l" do |data|
|
||||
if /\(def(un|macro)\s/.match(data)
|
||||
Language["Common Lisp"]
|
||||
@@ -208,7 +244,7 @@ module Linguist
|
||||
Language["MUF"]
|
||||
elsif /^\s*;/.match(data)
|
||||
Language["M"]
|
||||
elsif /^\s*\(\*/.match(data)
|
||||
elsif /\*\)$/.match(data)
|
||||
Language["Mathematica"]
|
||||
elsif /^\s*%/.match(data)
|
||||
Language["Matlab"]
|
||||
@@ -217,6 +253,14 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".md" do |data|
|
||||
if /(^[-a-z0-9=#!\*\[|])|<\//i.match(data) || data.empty?
|
||||
Language["Markdown"]
|
||||
elsif /^(;;|\(define_)/.match(data)
|
||||
Language["GCC machine description"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".ml" do |data|
|
||||
if /(^\s*module)|let rec |match\s+(\S+\s)+with/.match(data)
|
||||
Language["OCaml"]
|
||||
@@ -313,14 +357,38 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".props" do |data|
|
||||
if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
|
||||
Language["XML"]
|
||||
elsif /\w+\s*=\s*/i.match(data)
|
||||
Language["INI"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".r" do |data|
|
||||
if /\bRebol\b/i.match(data)
|
||||
Language["Rebol"]
|
||||
elsif data.include?("<-")
|
||||
elsif /<-|^\s*#/.match(data)
|
||||
Language["R"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".rno" do |data|
|
||||
if /^\.!|^\.end lit(?:eral)?\b/i.match(data)
|
||||
Language["RUNOFF"]
|
||||
elsif /^\.\\" /.match(data)
|
||||
Language["Groff"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".rpy" do |data|
|
||||
if /(^(import|from|class|def)\s)/m.match(data)
|
||||
Language["Python"]
|
||||
else
|
||||
Language["Ren'Py"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".rs" do |data|
|
||||
if /^(use |fn |mod |pub |macro_rules|impl|#!?\[)/.match(data)
|
||||
Language["Rust"]
|
||||
@@ -338,13 +406,13 @@ module Linguist
|
||||
end
|
||||
|
||||
disambiguate ".sql" do |data|
|
||||
if /^\\i\b|AS \$\$|LANGUAGE '+plpgsql'+/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
|
||||
if /^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
|
||||
#Postgres
|
||||
Language["PLpgSQL"]
|
||||
elsif /(alter module)|(language sql)|(begin( NOT)+ atomic)/i.match(data) || /signal SQLSTATE '[0-9]+'/i.match(data)
|
||||
#IBM db2
|
||||
Language["SQLPL"]
|
||||
elsif /pragma|\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
|
||||
elsif /\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
|
||||
#Oracle
|
||||
Language["PLSQL"]
|
||||
elsif ! /begin|boolean|package|exception/i.match(data)
|
||||
@@ -352,9 +420,31 @@ module Linguist
|
||||
Language["SQL"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".srt" do |data|
|
||||
if /^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$/.match(data)
|
||||
Language["SubRip Text"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".t" do |data|
|
||||
if /^\s*%|^\s*var\s+\w+\s*:\s*\w+/.match(data)
|
||||
Language["Turing"]
|
||||
elsif /^\s*use\s+v6\s*;/.match(data)
|
||||
Language["Perl6"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".toc" do |data|
|
||||
if /^## |@no-lib-strip@/.match(data)
|
||||
Language["World of Warcraft Addon Data"]
|
||||
elsif /^\\(contentsline|defcounter|beamer|boolfalse)/.match(data)
|
||||
Language["TeX"]
|
||||
end
|
||||
end
|
||||
|
||||
disambiguate ".ts" do |data|
|
||||
if data.include?("<TS ")
|
||||
if data.include?("<TS")
|
||||
Language["XML"]
|
||||
else
|
||||
Language["TypeScript"]
|
||||
|
||||
@@ -20,10 +20,11 @@ module Linguist
|
||||
#
|
||||
# Languages are defined in `lib/linguist/languages.yml`.
|
||||
class Language
|
||||
@languages = []
|
||||
@index = {}
|
||||
@name_index = {}
|
||||
@alias_index = {}
|
||||
@languages = []
|
||||
@index = {}
|
||||
@name_index = {}
|
||||
@alias_index = {}
|
||||
@language_id_index = {}
|
||||
|
||||
@extension_index = Hash.new { |h,k| h[k] = [] }
|
||||
@interpreter_index = Hash.new { |h,k| h[k] = [] }
|
||||
@@ -84,17 +85,11 @@ module Linguist
|
||||
@filename_index[filename] << language
|
||||
end
|
||||
|
||||
@language_id_index[language.language_id] = language
|
||||
|
||||
language
|
||||
end
|
||||
|
||||
STRATEGIES = [
|
||||
Linguist::Strategy::Modeline,
|
||||
Linguist::Shebang,
|
||||
Linguist::Strategy::Filename,
|
||||
Linguist::Heuristics,
|
||||
Linguist::Classifier
|
||||
]
|
||||
|
||||
# Public: Detects the Language of the blob.
|
||||
#
|
||||
# blob - an object that includes the Linguist `BlobHelper` interface;
|
||||
@@ -102,34 +97,8 @@ module Linguist
|
||||
#
|
||||
# Returns Language or nil.
|
||||
def self.detect(blob)
|
||||
# Bail early if the blob is binary or empty.
|
||||
return nil if blob.likely_binary? || blob.binary? || blob.empty?
|
||||
|
||||
Linguist.instrument("linguist.detection", :blob => blob) do
|
||||
# Call each strategy until one candidate is returned.
|
||||
languages = []
|
||||
returning_strategy = nil
|
||||
|
||||
STRATEGIES.each do |strategy|
|
||||
returning_strategy = strategy
|
||||
candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
|
||||
strategy.call(blob, languages)
|
||||
end
|
||||
if candidates.size == 1
|
||||
languages = candidates
|
||||
break
|
||||
elsif candidates.size > 1
|
||||
# More than one candidate was found, pass them to the next strategy.
|
||||
languages = candidates
|
||||
else
|
||||
# No candidates, try the next strategy
|
||||
end
|
||||
end
|
||||
|
||||
Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
|
||||
|
||||
languages.first
|
||||
end
|
||||
warn "[DEPRECATED] `Linguist::Language.detect` is deprecated. Use `Linguist.detect`. #{caller[0]}"
|
||||
Linguist.detect(blob)
|
||||
end
|
||||
|
||||
# Public: Get all Languages
|
||||
@@ -227,6 +196,19 @@ module Linguist
|
||||
@interpreter_index[interpreter]
|
||||
end
|
||||
|
||||
# Public: Look up Languages by its language_id.
|
||||
#
|
||||
# language_id - Integer of language_id
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# Language.find_by_id(100)
|
||||
# # => [#<Language name="Elixir">]
|
||||
#
|
||||
# Returns the matching Language
|
||||
def self.find_by_id(language_id)
|
||||
@language_id_index[language_id.to_i]
|
||||
end
|
||||
|
||||
# Public: Look up Language by its name.
|
||||
#
|
||||
@@ -285,6 +267,7 @@ module Linguist
|
||||
# Returns an Array of Languages.
|
||||
def self.ace_modes
|
||||
warn "This method will be deprecated in a future 5.x release. Every language now has an `ace_mode` set."
|
||||
warn caller
|
||||
@ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
|
||||
end
|
||||
|
||||
@@ -318,11 +301,16 @@ module Linguist
|
||||
end
|
||||
|
||||
@ace_mode = attributes[:ace_mode]
|
||||
@codemirror_mode = attributes[:codemirror_mode]
|
||||
@codemirror_mime_type = attributes[:codemirror_mime_type]
|
||||
@wrap = attributes[:wrap] || false
|
||||
|
||||
# Set legacy search term
|
||||
@search_term = attributes[:search_term] || default_alias_name
|
||||
|
||||
# Set the language_id
|
||||
@language_id = attributes[:language_id]
|
||||
|
||||
# Set extensions or default to [].
|
||||
@extensions = attributes[:extensions] || []
|
||||
@interpreters = attributes[:interpreters] || []
|
||||
@@ -385,6 +373,17 @@ module Linguist
|
||||
# Returns the name String
|
||||
attr_reader :search_term
|
||||
|
||||
# Public: Get language_id (used in GitHub search)
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# # => "1"
|
||||
# # => "2"
|
||||
# # => "3"
|
||||
#
|
||||
# Returns the integer language_id
|
||||
attr_reader :language_id
|
||||
|
||||
# Public: Get the name of a TextMate-compatible scope
|
||||
#
|
||||
# Returns the scope
|
||||
@@ -401,6 +400,31 @@ module Linguist
|
||||
# Returns a String name or nil
|
||||
attr_reader :ace_mode
|
||||
|
||||
# Public: Get CodeMirror mode
|
||||
#
|
||||
# Maps to a directory in the `mode/` source code.
|
||||
# https://github.com/codemirror/CodeMirror/tree/master/mode
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# # => "nil"
|
||||
# # => "javascript"
|
||||
# # => "clike"
|
||||
#
|
||||
# Returns a String name or nil
|
||||
attr_reader :codemirror_mode
|
||||
|
||||
# Public: Get CodeMirror MIME type mode
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# # => "nil"
|
||||
# # => "text/x-javascript"
|
||||
# # => "text/x-csrc"
|
||||
#
|
||||
# Returns a String name or nil
|
||||
attr_reader :codemirror_mime_type
|
||||
|
||||
# Public: Should language lines be wrapped
|
||||
#
|
||||
# Returns true or false
|
||||
@@ -577,10 +601,13 @@ module Linguist
|
||||
:aliases => options['aliases'],
|
||||
:tm_scope => options['tm_scope'],
|
||||
:ace_mode => options['ace_mode'],
|
||||
:codemirror_mode => options['codemirror_mode'],
|
||||
:codemirror_mime_type => options['codemirror_mime_type'],
|
||||
:wrap => options['wrap'],
|
||||
:group_name => options['group'],
|
||||
:searchable => options.fetch('searchable', true),
|
||||
:search_term => options['search_term'],
|
||||
:language_id => options['language_id'],
|
||||
:extensions => Array(options['extensions']),
|
||||
:interpreters => options['interpreters'].sort,
|
||||
:filenames => options['filenames'],
|
||||
|
||||
3679
lib/linguist/languages.yml
Normal file → Executable file
3679
lib/linguist/languages.yml
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
@@ -28,6 +28,7 @@ module Linguist
|
||||
@oid = oid
|
||||
@path = path
|
||||
@mode = mode
|
||||
@data = nil
|
||||
end
|
||||
|
||||
def git_attributes
|
||||
|
||||
@@ -30,6 +30,9 @@ module Linguist
|
||||
@repository = repo
|
||||
@commit_oid = commit_oid
|
||||
|
||||
@old_commit_oid = nil
|
||||
@old_stats = nil
|
||||
|
||||
raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
|
||||
end
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ module Linguist
|
||||
def self.cache
|
||||
@cache ||= begin
|
||||
serializer = defined?(Yajl) ? Yajl : YAML
|
||||
serializer.load(File.read(PATH))
|
||||
serializer.load(File.read(PATH, encoding: 'utf-8'))
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -42,10 +42,10 @@ module Linguist
|
||||
return unless script
|
||||
|
||||
# "python2.6" -> "python2"
|
||||
script.sub! /(\.\d+)$/, ''
|
||||
script.sub!(/(\.\d+)$/, '')
|
||||
|
||||
# #! perl -> perl
|
||||
script.sub! /^#!\s*/, ''
|
||||
script.sub!(/^#!\s*/, '')
|
||||
|
||||
# Check for multiline shebang hacks that call `exec`
|
||||
if script == 'sh' &&
|
||||
|
||||
@@ -1,19 +1,102 @@
|
||||
module Linguist
|
||||
module Strategy
|
||||
class Modeline
|
||||
EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
|
||||
EMACS_MODELINE = /
|
||||
-\*-
|
||||
(?:
|
||||
# Short form: `-*- ruby -*-`
|
||||
\s* (?= [^:;\s]+ \s* -\*-)
|
||||
|
|
||||
# Longer form: `-*- foo:bar; mode: ruby; -*-`
|
||||
(?:
|
||||
.*? # Preceding variables: `-*- foo:bar bar:baz;`
|
||||
[;\s] # Which are delimited by spaces or semicolons
|
||||
|
|
||||
(?<=-\*-) # Not preceded by anything: `-*-mode:ruby-*-`
|
||||
)
|
||||
mode # Major mode indicator
|
||||
\s*:\s* # Allow whitespace around colon: `mode : ruby`
|
||||
)
|
||||
([^:;\s]+) # Name of mode
|
||||
|
||||
# First form vim modeline
|
||||
# [text]{white}{vi:|vim:|ex:}[white]{options}
|
||||
# ex: 'vim: syntax=ruby'
|
||||
VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i
|
||||
# Ensure the mode is terminated correctly
|
||||
(?=
|
||||
# Followed by semicolon or whitespace
|
||||
[\s;]
|
||||
|
|
||||
# Touching the ending sequence: `ruby-*-`
|
||||
(?<![-*]) # Don't allow stuff like `ruby--*-` to match; it'll invalidate the mode
|
||||
-\*- # Emacs has no problems reading `ruby --*-`, however.
|
||||
)
|
||||
.*? # Anything between a cleanly-terminated mode and the ending -*-
|
||||
-\*-
|
||||
/xi
|
||||
|
||||
# Second form vim modeline (compatible with some versions of Vi)
|
||||
# [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text]
|
||||
# ex: 'vim set syntax=ruby:'
|
||||
VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i
|
||||
VIM_MODELINE = /
|
||||
|
||||
MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2]
|
||||
# Start modeline. Could be `vim:`, `vi:` or `ex:`
|
||||
(?:
|
||||
(?:\s|^)
|
||||
vi
|
||||
(?:m[<=>]?\d+|m)? # Version-specific modeline
|
||||
|
|
||||
[\t\x20] # `ex:` requires whitespace, because "ex:" might be short for "example:"
|
||||
ex
|
||||
)
|
||||
|
||||
# If the option-list begins with `set ` or `se `, it indicates an alternative
|
||||
# modeline syntax partly-compatible with older versions of Vi. Here, the colon
|
||||
# serves as a terminator for an option sequence, delimited by whitespace.
|
||||
(?=
|
||||
# So we have to ensure the modeline ends with a colon
|
||||
: (?=\s* set? \s [^\n:]+ :) |
|
||||
|
||||
# Otherwise, it isn't valid syntax and should be ignored
|
||||
: (?!\s* set? \s)
|
||||
)
|
||||
|
||||
# Possible (unrelated) `option=value` pairs to skip past
|
||||
(?:
|
||||
# Option separator. Vim uses whitespace or colons to separate options (except if
|
||||
# the alternate "vim: set " form is used, where only whitespace is used)
|
||||
(?:
|
||||
\s
|
||||
|
|
||||
\s* : \s* # Note that whitespace around colons is accepted too:
|
||||
) # vim: noai : ft=ruby:noexpandtab
|
||||
|
||||
# Option's name. All recognised Vim options have an alphanumeric form.
|
||||
\w*
|
||||
|
||||
# Possible value. Not every option takes an argument.
|
||||
(?:
|
||||
# Whitespace between name and value is allowed: `vim: ft =ruby`
|
||||
\s*=
|
||||
|
||||
# Option's value. Might be blank; `vim: ft= ` says "use no filetype".
|
||||
(?:
|
||||
[^\\\s] # Beware of escaped characters: titlestring=\ ft=ruby
|
||||
| # will be read by Vim as { titlestring: " ft=ruby" }.
|
||||
\\.
|
||||
)*
|
||||
)?
|
||||
)*
|
||||
|
||||
# The actual filetype declaration
|
||||
[\s:] (?:filetype|ft|syntax) \s*=
|
||||
|
||||
# Language's name
|
||||
(\w+)
|
||||
|
||||
# Ensure it's followed by a legal separator
|
||||
(?=\s|:|$)
|
||||
/xi
|
||||
|
||||
MODELINES = [EMACS_MODELINE, VIM_MODELINE]
|
||||
|
||||
# Scope of the search for modelines
|
||||
# Number of lines to check at the beginning and at the end of the file
|
||||
SEARCH_SCOPE = 5
|
||||
|
||||
# Public: Detects language based on Vim and Emacs modelines
|
||||
#
|
||||
@@ -26,7 +109,9 @@ module Linguist
|
||||
# Returns an Array with one Language if the blob has a Vim or Emacs modeline
|
||||
# that matches a Language name or alias. Returns an empty array if no match.
|
||||
def self.call(blob, _ = nil)
|
||||
Array(Language.find_by_alias(modeline(blob.data)))
|
||||
header = blob.lines.first(SEARCH_SCOPE).join("\n")
|
||||
footer = blob.lines.last(SEARCH_SCOPE).join("\n")
|
||||
Array(Language.find_by_alias(modeline(header + footer)))
|
||||
end
|
||||
|
||||
# Public: Get the modeline from the first n-lines of the file
|
||||
|
||||
@@ -15,15 +15,25 @@
|
||||
# Dependencies
|
||||
- ^[Dd]ependencies/
|
||||
|
||||
# Distributions
|
||||
- (^|/)dist/
|
||||
|
||||
# C deps
|
||||
# https://github.com/joyent/node
|
||||
- ^deps/
|
||||
- ^tools/
|
||||
- (^|/)configure$
|
||||
- (^|/)configure.ac$
|
||||
- (^|/)config.guess$
|
||||
- (^|/)config.sub$
|
||||
|
||||
# stuff autogenerated by autoconf - still C deps
|
||||
- (^|/)aclocal.m4
|
||||
- (^|/)libtool.m4
|
||||
- (^|/)ltoptions.m4
|
||||
- (^|/)ltsugar.m4
|
||||
- (^|/)ltversion.m4
|
||||
- (^|/)lt~obsolete.m4
|
||||
|
||||
# Linters
|
||||
- cpplint.py
|
||||
|
||||
@@ -146,13 +156,19 @@
|
||||
- (^|/)tiny_mce([^.]*)\.js$
|
||||
- (^|/)tiny_mce/(langs|plugins|themes|utils)
|
||||
|
||||
# Ace Editor
|
||||
- (^|/)ace-builds/
|
||||
|
||||
# Fontello CSS files
|
||||
- (^|/)fontello(.*?)\.css$
|
||||
|
||||
# MathJax
|
||||
- (^|/)MathJax/
|
||||
|
||||
# Chart.js
|
||||
- (^|/)Chart\.js$
|
||||
|
||||
# Codemirror
|
||||
# CodeMirror
|
||||
- (^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)
|
||||
|
||||
# SyntaxHighlighter - http://alexgorbatchev.com/
|
||||
@@ -183,6 +199,7 @@
|
||||
|
||||
# django
|
||||
- (^|/)admin_media/
|
||||
- (^|/)env/
|
||||
|
||||
# Fabric
|
||||
- ^fabfile\.py$
|
||||
@@ -215,6 +232,9 @@
|
||||
# Fabric
|
||||
- Fabric.framework/
|
||||
|
||||
# BuddyBuild
|
||||
- BuddyBuildSDK.framework/
|
||||
|
||||
# git config files
|
||||
- gitattributes$
|
||||
- gitignore$
|
||||
@@ -302,3 +322,6 @@
|
||||
|
||||
# Android Google APIs
|
||||
- (^|/)\.google_apis/
|
||||
|
||||
# Jenkins Pipeline
|
||||
- ^Jenkinsfile$
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
module Linguist
|
||||
VERSION = "4.7.5"
|
||||
VERSION = "4.8.16"
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user