Merge branch 'master' into go-vendor

2025-10-29 17:50:22 +00:00 · 2016-10-26 18:34:02 -04:00
parent 789607d9bc 346aa99fcf
commit 4efc6f8c95
719 changed files with 83506 additions and 9105 deletions
--- a/lib/linguist.rb
+++ b/lib/linguist.rb
@@ -1,5 +1,6 @@
 require 'linguist/blob_helper'
 require 'linguist/generated'
+require 'linguist/grammars'
 require 'linguist/heuristics'
 require 'linguist/language'
 require 'linguist/repository'
@@ -8,8 +9,85 @@ require 'linguist/shebang'
 require 'linguist/version'

 class << Linguist
+  # Public: Detects the Language of the blob.
+  #
+  # blob - an object that includes the Linguist `BlobHelper` interface;
+  #       see Linguist::LazyBlob and Linguist::FileBlob for examples
+  #
+  # Returns Language or nil.
+  def detect(blob)
+    # Bail early if the blob is binary or empty.
+    return nil if blob.likely_binary? || blob.binary? || blob.empty?
+
+    Linguist.instrument("linguist.detection", :blob => blob) do
+      # Call each strategy until one candidate is returned.
+      languages = []
+      returning_strategy = nil
+
+      STRATEGIES.each do |strategy|
+        returning_strategy = strategy
+        candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
+          strategy.call(blob, languages)
+        end
+        if candidates.size == 1
+          languages = candidates
+          break
+        elsif candidates.size > 1
+          # More than one candidate was found, pass them to the next strategy.
+          languages = candidates
+        else
+          # No candidates, try the next strategy
+        end
+      end
+
+      Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
+
+      languages.first
+    end
+  end
+
+  # Internal: The strategies used to detect the language of a file.
+  #
+  # A strategy is an object that has a `.call` method that takes two arguments:
+  #
+  #   blob - An object that quacks like a blob.
+  #   languages - An Array of candidate Language objects that were returned by the
+  #               previous strategy.
+  #
+  # A strategy should return an Array of Language candidates.
+  #
+  # Strategies are called in turn until a single Language is returned.
+  STRATEGIES = [
+    Linguist::Strategy::Modeline,
+    Linguist::Shebang,
+    Linguist::Strategy::Filename,
+    Linguist::Heuristics,
+    Linguist::Classifier
+  ]
+
+  # Public: Set an instrumenter.
+  #
+  #     class CustomInstrumenter
+  #       def instrument(name, payload = {})
+  #         warn "Instrumenting #{name}: #{payload[:blob]}"
+  #       end
+  #     end
+  #
+  #     Linguist.instrumenter = CustomInstrumenter
+  #
+  # The instrumenter must conform to the `ActiveSupport::Notifications`
+  # interface, which defines `#instrument` and accepts:
+  #
+  # name    - the String name of the event (e.g. "linguist.detected")
+  # payload - a Hash of the exception context.
  attr_accessor :instrumenter

+  # Internal: Perform instrumentation on a block
+  #
+  #     Linguist.instrument("linguist.dosomething", :blob => blob) do
+  #       # logic to instrument here.
+  #     end
+  #
  def instrument(*args, &bk)
    if instrumenter
      instrumenter.instrument(*args, &bk)
@@ -17,4 +95,5 @@ class << Linguist
      yield
    end
  end
+
 end
--- a/lib/linguist/blob.rb
+++ b/lib/linguist/blob.rb
@@ -63,7 +63,7 @@ module Linguist
    #
    # Returns an Array
    def extensions
-      basename, *segments = name.downcase.split(".")
+      _, *segments = name.downcase.split(".")

      segments.map.with_index do |segment, index|
        "." + segments[index..-1].join(".")
--- a/lib/linguist/blob_helper.rb
+++ b/lib/linguist/blob_helper.rb
@@ -6,7 +6,7 @@ require 'yaml'

 module Linguist
  # DEPRECATED Avoid mixing into Blob classes. Prefer functional interfaces
-  # like `Language.detect` over `Blob#language`. Functions are much easier to
+  # like `Linguist.detect` over `Blob#language`. Functions are much easier to
  # cache and compose.
  #
  # Avoid adding additional bloat to this module.
@@ -325,7 +325,7 @@ module Linguist
    #
    # Returns a Language or nil if none is detected
    def language
-      @language ||= Language.detect(self)
+      @language ||= Linguist.detect(self)
    end

    # Internal: Get the TextMate compatible scope for the blob
--- a/lib/linguist/generated.rb
+++ b/lib/linguist/generated.rb
@@ -57,9 +57,11 @@ module Linguist
      composer_lock? ||
      node_modules? ||
      go_vendor? ||
+      npm_shrinkwrap? ||
      godeps? ||
      generated_by_zephir? ||
      minified_files? ||
+      has_source_map? ||
      source_map? ||
      compiled_coffeescript? ||
      generated_parser? ||
@@ -105,6 +107,21 @@ module Linguist
      end
    end

+    # Internal: Does the blob contain a source map reference?
+    #
+    # We assume that if one of the last 2 lines starts with a source map
+    # reference, then the current file was generated from other files.
+    #
+    # We use the last 2 lines because the last line might be empty.
+    #
+    # We only handle JavaScript, no CSS support yet.
+    #
+    # Returns true or false.
+    def has_source_map?
+      return false unless extname.downcase == '.js'
+      lines.last(2).any? { |line| line.start_with?('//# sourceMappingURL') }
+    end
+
    # Internal: Is the blob a generated source map?
    #
    # Source Maps usually have .css.map or .js.map extensions. In case they
@@ -298,6 +315,13 @@ module Linguist
      !!name.match(/\Avendor\//)
    end

+    # Internal: Is the blob a generated npm shrinkwrap file.
+    #
+    # Returns true or false.
+    def npm_shrinkwrap?
+      !!name.match(/npm-shrinkwrap\.json/)
+    end
+
    # Internal: Is the blob part of Godeps/,
    # which are not meant for humans in pull requests.
    #
@@ -350,14 +374,14 @@ module Linguist
    # on the first line.
    #
    # GFortran module files contain:
-    # GFORTRAN module version 'x' created from 
+    # GFORTRAN module version 'x' created from
    # on the first line.
    #
    # Return true of false
    def generated_module?
      return false unless extname == '.mod'
      return false unless lines.count > 1
-      return lines[0].include?("PCBNEW-LibModule-V") || 
+      return lines[0].include?("PCBNEW-LibModule-V") ||
              lines[0].include?("GFORTRAN module version '")
    end

--- a/lib/linguist/grammars.rb
+++ b/lib/linguist/grammars.rb
@@ -1,6 +1,3 @@
-# Note: This file is included in the github-linguist-grammars gem, not the
-# github-linguist gem.
-
 module Linguist
  module Grammars
    # Get the path to the directory containing the language grammar JSON files.
--- a/lib/linguist/heuristics.rb
+++ b/lib/linguist/heuristics.rb
@@ -86,6 +86,14 @@ module Linguist
      end
    end

+    disambiguate ".builds" do |data|
+      if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
+        Language["XML"]
+      else
+        Language["Text"]
+      end
+    end
+
    disambiguate ".ch" do |data|
      if /^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b/i.match(data)
        Language["xBase"]
@@ -127,11 +135,31 @@ module Linguist
        Language["ECL"]
      end
    end
+    
+    disambiguate ".es" do |data|
+      if /^\s*(?:%%|main\s*\(.*?\)\s*->)/.match(data)
+        Language["Erlang"]
+      elsif /(?:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)/m.match(data)
+        Language["JavaScript"]
+      end
+    end

-    disambiguate ".for", ".f" do |data|
+    fortran_rx = /^([c*][^abd-z]|      (subroutine|program|end|data)\s|\s*!)/i
+
+    disambiguate ".f" do |data|
      if /^: /.match(data)
        Language["Forth"]
-      elsif /^([c*][^abd-z]|      (subroutine|program|end)\s|\s*!)/i.match(data)
+      elsif data.include?("flowop")
+        Language["Filebench WML"]
+      elsif fortran_rx.match(data)
+        Language["FORTRAN"]
+      end
+    end
+
+    disambiguate ".for" do |data|
+      if /^: /.match(data)
+        Language["Forth"]
+      elsif fortran_rx.match(data)
        Language["FORTRAN"]
      end
    end
@@ -171,6 +199,14 @@ module Linguist
      end
    end

+    disambiguate ".inc" do |data|
+      if /^<\?(?:php)?/.match(data)
+        Language["PHP"]
+      elsif /^\s*#(declare|local|macro|while)\s/.match(data)
+        Language["POV-Ray SDL"]
+      end
+    end
+
    disambiguate ".l" do |data|
      if /\(def(un|macro)\s/.match(data)
        Language["Common Lisp"]
@@ -208,7 +244,7 @@ module Linguist
        Language["MUF"]
      elsif /^\s*;/.match(data)
        Language["M"]
-      elsif /^\s*\(\*/.match(data)
+      elsif /\*\)$/.match(data)
        Language["Mathematica"]
      elsif /^\s*%/.match(data)
        Language["Matlab"]
@@ -217,6 +253,14 @@ module Linguist
      end
    end

+    disambiguate ".md" do |data|
+      if /(^[-a-z0-9=#!\*\[|])|<\//i.match(data) || data.empty?
+        Language["Markdown"]
+      elsif /^(;;|\(define_)/.match(data)
+        Language["GCC machine description"]
+      end
+    end
+
    disambiguate ".ml" do |data|
      if /(^\s*module)|let rec |match\s+(\S+\s)+with/.match(data)
        Language["OCaml"]
@@ -313,14 +357,38 @@ module Linguist
      end
    end

+    disambiguate ".props" do |data|
+      if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
+        Language["XML"]
+      elsif /\w+\s*=\s*/i.match(data)
+        Language["INI"]
+      end
+    end
+
    disambiguate ".r" do |data|
      if /\bRebol\b/i.match(data)
        Language["Rebol"]
-      elsif data.include?("<-")
+      elsif /<-|^\s*#/.match(data)
        Language["R"]
      end
    end

+    disambiguate ".rno" do |data|
+      if /^\.!|^\.end lit(?:eral)?\b/i.match(data)
+        Language["RUNOFF"]
+      elsif /^\.\\" /.match(data)
+        Language["Groff"]
+      end
+    end
+
+    disambiguate ".rpy" do |data|
+      if /(^(import|from|class|def)\s)/m.match(data)
+        Language["Python"]
+      else
+        Language["Ren'Py"]
+      end
+    end
+
    disambiguate ".rs" do |data|
      if /^(use |fn |mod |pub |macro_rules|impl|#!?\[)/.match(data)
        Language["Rust"]
@@ -338,13 +406,13 @@ module Linguist
    end

    disambiguate ".sql" do |data|
-      if /^\\i\b|AS \$\$|LANGUAGE '+plpgsql'+/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
+      if /^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
        #Postgres
        Language["PLpgSQL"]
      elsif /(alter module)|(language sql)|(begin( NOT)+ atomic)/i.match(data)  || /signal SQLSTATE '[0-9]+'/i.match(data)
        #IBM db2
        Language["SQLPL"]
-      elsif /pragma|\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
+      elsif /\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
        #Oracle
        Language["PLSQL"]
      elsif ! /begin|boolean|package|exception/i.match(data)
@@ -352,9 +420,31 @@ module Linguist
        Language["SQL"]
      end
    end
+    
+    disambiguate ".srt" do |data|
+      if /^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$/.match(data)
+        Language["SubRip Text"]
+      end
+    end
+    
+    disambiguate ".t" do |data|
+      if /^\s*%|^\s*var\s+\w+\s*:\s*\w+/.match(data)
+        Language["Turing"]
+      elsif /^\s*use\s+v6\s*;/.match(data)
+        Language["Perl6"]
+      end
+    end
+    
+    disambiguate ".toc" do |data|
+      if /^## |@no-lib-strip@/.match(data)
+        Language["World of Warcraft Addon Data"]
+      elsif /^\\(contentsline|defcounter|beamer|boolfalse)/.match(data)
+        Language["TeX"]
+      end
+    end

    disambiguate ".ts" do |data|
-      if data.include?("<TS ")
+      if data.include?("<TS")
        Language["XML"]
      else
        Language["TypeScript"]
--- a/lib/linguist/language.rb
+++ b/lib/linguist/language.rb
@@ -20,10 +20,11 @@ module Linguist
  #
  # Languages are defined in `lib/linguist/languages.yml`.
  class Language
-    @languages       = []
-    @index           = {}
-    @name_index      = {}
-    @alias_index     = {}
+    @languages          = []
+    @index              = {}
+    @name_index         = {}
+    @alias_index        = {}
+    @language_id_index  = {}

    @extension_index          = Hash.new { |h,k| h[k] = [] }
    @interpreter_index        = Hash.new { |h,k| h[k] = [] }
@@ -84,17 +85,11 @@ module Linguist
        @filename_index[filename] << language
      end

+      @language_id_index[language.language_id] = language
+
      language
    end

-    STRATEGIES = [
-      Linguist::Strategy::Modeline,
-      Linguist::Shebang,
-      Linguist::Strategy::Filename,
-      Linguist::Heuristics,
-      Linguist::Classifier
-    ]
-
    # Public: Detects the Language of the blob.
    #
    # blob - an object that includes the Linguist `BlobHelper` interface;
@@ -102,34 +97,8 @@ module Linguist
    #
    # Returns Language or nil.
    def self.detect(blob)
-      # Bail early if the blob is binary or empty.
-      return nil if blob.likely_binary? || blob.binary? || blob.empty?
-
-      Linguist.instrument("linguist.detection", :blob => blob) do
-        # Call each strategy until one candidate is returned.
-        languages = []
-        returning_strategy = nil
-
-        STRATEGIES.each do |strategy|
-          returning_strategy = strategy
-          candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
-            strategy.call(blob, languages)
-          end
-          if candidates.size == 1
-            languages = candidates
-            break
-          elsif candidates.size > 1
-            # More than one candidate was found, pass them to the next strategy.
-            languages = candidates
-          else
-            # No candidates, try the next strategy
-          end
-        end
-
-        Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
-
-        languages.first
-      end
+      warn "[DEPRECATED] `Linguist::Language.detect` is deprecated. Use `Linguist.detect`. #{caller[0]}"
+      Linguist.detect(blob)
    end

    # Public: Get all Languages
@@ -227,6 +196,19 @@ module Linguist
      @interpreter_index[interpreter]
    end

+    # Public: Look up Languages by its language_id.
+    #
+    # language_id - Integer of language_id
+    #
+    # Examples
+    #
+    #   Language.find_by_id(100)
+    #   # => [#<Language name="Elixir">]
+    #
+    # Returns the matching Language
+    def self.find_by_id(language_id)
+      @language_id_index[language_id.to_i]
+    end

    # Public: Look up Language by its name.
    #
@@ -285,6 +267,7 @@ module Linguist
    # Returns an Array of Languages.
    def self.ace_modes
      warn "This method will be deprecated in a future 5.x release. Every language now has an `ace_mode` set."
+      warn caller
      @ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
    end

@@ -318,11 +301,16 @@ module Linguist
      end

      @ace_mode = attributes[:ace_mode]
+      @codemirror_mode = attributes[:codemirror_mode]
+      @codemirror_mime_type = attributes[:codemirror_mime_type]
      @wrap = attributes[:wrap] || false

      # Set legacy search term
      @search_term = attributes[:search_term] || default_alias_name

+      # Set the language_id
+      @language_id = attributes[:language_id]
+
      # Set extensions or default to [].
      @extensions = attributes[:extensions] || []
      @interpreters = attributes[:interpreters]   || []
@@ -385,6 +373,17 @@ module Linguist
    # Returns the name String
    attr_reader :search_term

+    # Public: Get language_id (used in GitHub search)
+    #
+    # Examples
+    #
+    #   # => "1"
+    #   # => "2"
+    #   # => "3"
+    #
+    # Returns the integer language_id
+    attr_reader :language_id
+
    # Public: Get the name of a TextMate-compatible scope
    #
    # Returns the scope
@@ -401,6 +400,31 @@ module Linguist
    # Returns a String name or nil
    attr_reader :ace_mode

+    # Public: Get CodeMirror mode
+    #
+    # Maps to a directory in the `mode/` source code.
+    #   https://github.com/codemirror/CodeMirror/tree/master/mode
+    #
+    # Examples
+    #
+    #  # => "nil"
+    #  # => "javascript"
+    #  # => "clike"
+    #
+    # Returns a String name or nil
+    attr_reader :codemirror_mode
+
+    # Public: Get CodeMirror MIME type mode
+    #
+    # Examples
+    #
+    #  # => "nil"
+    #  # => "text/x-javascript"
+    #  # => "text/x-csrc"
+    #
+    # Returns a String name or nil
+    attr_reader :codemirror_mime_type
+
    # Public: Should language lines be wrapped
    #
    # Returns true or false
@@ -577,10 +601,13 @@ module Linguist
      :aliases           => options['aliases'],
      :tm_scope          => options['tm_scope'],
      :ace_mode          => options['ace_mode'],
+      :codemirror_mode   => options['codemirror_mode'],
+      :codemirror_mime_type => options['codemirror_mime_type'],
      :wrap              => options['wrap'],
      :group_name        => options['group'],
      :searchable        => options.fetch('searchable', true),
      :search_term       => options['search_term'],
+      :language_id       => options['language_id'],
      :extensions        => Array(options['extensions']),
      :interpreters      => options['interpreters'].sort,
      :filenames         => options['filenames'],
--- a/lib/linguist/languages.yml
+++ b/lib/linguist/languages.yml
--- a/lib/linguist/lazy_blob.rb
+++ b/lib/linguist/lazy_blob.rb
@@ -28,6 +28,7 @@ module Linguist
      @oid = oid
      @path = path
      @mode = mode
+      @data = nil
    end

    def git_attributes
--- a/lib/linguist/repository.rb
+++ b/lib/linguist/repository.rb
@@ -30,6 +30,9 @@ module Linguist
      @repository = repo
      @commit_oid = commit_oid

+      @old_commit_oid = nil
+      @old_stats = nil
+
      raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
    end

--- a/lib/linguist/samples.rb
+++ b/lib/linguist/samples.rb
@@ -21,7 +21,7 @@ module Linguist
    def self.cache
      @cache ||= begin
        serializer = defined?(Yajl) ? Yajl : YAML
-        serializer.load(File.read(PATH))
+        serializer.load(File.read(PATH, encoding: 'utf-8'))
      end
    end

--- a/lib/linguist/shebang.rb
+++ b/lib/linguist/shebang.rb
@@ -42,10 +42,10 @@ module Linguist
      return unless script

      # "python2.6" -> "python2"
-      script.sub! /(\.\d+)$/, ''
+      script.sub!(/(\.\d+)$/, '')

      # #! perl -> perl
-      script.sub! /^#!\s*/, ''
+      script.sub!(/^#!\s*/, '')

      # Check for multiline shebang hacks that call `exec`
      if script == 'sh' &&
--- a/lib/linguist/strategy/modeline.rb
+++ b/lib/linguist/strategy/modeline.rb
@@ -1,19 +1,102 @@
 module Linguist
  module Strategy
    class Modeline
-      EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
+      EMACS_MODELINE = /
+        -\*-
+        (?:
+          # Short form: `-*- ruby -*-`
+          \s* (?= [^:;\s]+ \s* -\*-)
+          |
+          # Longer form: `-*- foo:bar; mode: ruby; -*-`
+          (?:
+            .*?       # Preceding variables: `-*- foo:bar bar:baz;`
+            [;\s]     # Which are delimited by spaces or semicolons
+            |
+            (?<=-\*-) # Not preceded by anything: `-*-mode:ruby-*-`
+          )
+          mode        # Major mode indicator
+          \s*:\s*     # Allow whitespace around colon: `mode : ruby`
+        )
+        ([^:;\s]+)    # Name of mode

-      # First form vim modeline
-      # [text]{white}{vi:|vim:|ex:}[white]{options}
-      # ex: 'vim: syntax=ruby'
-      VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i
+        # Ensure the mode is terminated correctly
+        (?=
+          # Followed by semicolon or whitespace
+          [\s;]
+          |
+          # Touching the ending sequence: `ruby-*-`
+          (?<![-*])   # Don't allow stuff like `ruby--*-` to match; it'll invalidate the mode
+          -\*-        # Emacs has no problems reading `ruby --*-`, however.
+        )
+        .*?           # Anything between a cleanly-terminated mode and the ending -*-
+        -\*-
+      /xi

-      # Second form vim modeline (compatible with some versions of Vi)
-      # [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text]
-      # ex: 'vim set syntax=ruby:'
-      VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i
+      VIM_MODELINE   = /

-      MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2]
+        # Start modeline. Could be `vim:`, `vi:` or `ex:`
+        (?:
+          (?:\s|^)
+          vi
+          (?:m[<=>]?\d+|m)? # Version-specific modeline
+          |
+          [\t\x20] # `ex:` requires whitespace, because "ex:" might be short for "example:"
+          ex
+        )
+
+        # If the option-list begins with `set ` or `se `, it indicates an alternative
+        # modeline syntax partly-compatible with older versions of Vi. Here, the colon
+        # serves as a terminator for an option sequence, delimited by whitespace.
+        (?=
+          # So we have to ensure the modeline ends with a colon
+          : (?=\s* set? \s [^\n:]+ :) |
+
+          # Otherwise, it isn't valid syntax and should be ignored
+          : (?!\s* set? \s)
+        )
+
+        # Possible (unrelated) `option=value` pairs to skip past
+        (?:
+          # Option separator. Vim uses whitespace or colons to separate options (except if
+          # the alternate "vim: set " form is used, where only whitespace is used)
+          (?:
+            \s
+            |
+            \s* : \s* # Note that whitespace around colons is accepted too:
+          )           # vim: noai :  ft=ruby:noexpandtab
+
+          # Option's name. All recognised Vim options have an alphanumeric form.
+          \w*
+
+          # Possible value. Not every option takes an argument.
+          (?:
+            # Whitespace between name and value is allowed: `vim: ft   =ruby`
+            \s*=
+
+            # Option's value. Might be blank; `vim: ft= ` says "use no filetype".
+            (?:
+              [^\\\s] # Beware of escaped characters: titlestring=\ ft=ruby
+              |       # will be read by Vim as { titlestring: " ft=ruby" }.
+              \\.
+            )*
+          )?
+        )*
+
+        # The actual filetype declaration
+        [\s:] (?:filetype|ft|syntax) \s*=
+
+        # Language's name
+        (\w+)
+
+        # Ensure it's followed by a legal separator
+        (?=\s|:|$)
+      /xi
+
+      MODELINES = [EMACS_MODELINE, VIM_MODELINE]
+
+      # Scope of the search for modelines
+      # Number of lines to check at the beginning and at the end of the file
+      SEARCH_SCOPE = 5

      # Public: Detects language based on Vim and Emacs modelines
      #
@@ -26,7 +109,9 @@ module Linguist
      # Returns an Array with one Language if the blob has a Vim or Emacs modeline
      # that matches a Language name or alias. Returns an empty array if no match.
      def self.call(blob, _ = nil)
-        Array(Language.find_by_alias(modeline(blob.data)))
+        header = blob.lines.first(SEARCH_SCOPE).join("\n")
+        footer = blob.lines.last(SEARCH_SCOPE).join("\n")
+        Array(Language.find_by_alias(modeline(header + footer)))
      end

      # Public: Get the modeline from the first n-lines of the file
--- a/lib/linguist/vendor.yml
+++ b/lib/linguist/vendor.yml
@@ -15,15 +15,25 @@
 # Dependencies
 - ^[Dd]ependencies/

+# Distributions
+- (^|/)dist/
+
 # C deps
 #  https://github.com/joyent/node
 - ^deps/
 - ^tools/
 - (^|/)configure$
- (^|/)configure.ac$
 - (^|/)config.guess$
 - (^|/)config.sub$

+# stuff autogenerated by autoconf - still C deps
+- (^|/)aclocal.m4
+- (^|/)libtool.m4
+- (^|/)ltoptions.m4
+- (^|/)ltsugar.m4
+- (^|/)ltversion.m4
+- (^|/)lt~obsolete.m4
+
 # Linters
 - cpplint.py

@@ -146,13 +156,19 @@
 - (^|/)tiny_mce([^.]*)\.js$
 - (^|/)tiny_mce/(langs|plugins|themes|utils)

+# Ace Editor
+- (^|/)ace-builds/
+
+# Fontello CSS files
+- (^|/)fontello(.*?)\.css$
+
 # MathJax
 - (^|/)MathJax/

 # Chart.js
 - (^|/)Chart\.js$

-# Codemirror
+# CodeMirror
 - (^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)

 # SyntaxHighlighter - http://alexgorbatchev.com/
@@ -183,6 +199,7 @@

 # django
 - (^|/)admin_media/
+- (^|/)env/

 # Fabric
 - ^fabfile\.py$
@@ -215,6 +232,9 @@
 # Fabric
 - Fabric.framework/

+# BuddyBuild
+- BuddyBuildSDK.framework/
+
 # git config files
 - gitattributes$
 - gitignore$
@@ -302,3 +322,6 @@

 # Android Google APIs
 - (^|/)\.google_apis/
+
+# Jenkins Pipeline
+- ^Jenkinsfile$
--- a/lib/linguist/version.rb
+++ b/lib/linguist/version.rb
@@ -1,3 +1,3 @@
 module Linguist
-  VERSION = "4.7.5"
+  VERSION = "4.8.16"
 end