Merge master

2025-10-29 17:50:22 +00:00 · 2014-11-28 11:04:53 -08:00
parent 4603f3b2e7 b16149d641
commit 26ab33754f
34 changed files with 2071 additions and 97 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,7 @@ before_install:
  - git fetch origin master:master
  - git fetch origin v2.0.0:v2.0.0
  - git fetch origin test/attributes:test/attributes
  - git fetch origin test/master:test/master
  - sudo apt-get install libicu-dev -y
 rvm:
  - 1.9.3
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,7 +12,7 @@ This can usually be solved either by adding a new filename or file name extensio
 Assuming your code is being detected as the right language (see above), in most cases this is due to a bug in the language grammar rather than a bug in Linguist. [`grammars.yml`][grammars] lists all the grammars we use for syntax highlighting on github.com. Find the one corresponding to your code's programming language and submit a bug report upstream.
-You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars.
+You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars. You can test grammars using [Lightshow](https://lightshow.githubapp.com).
 Once the bug has been fixed upstream, please let us know and we'll pick it up for GitHub.
--- a/1
+++ b/1
@@ -2,3 +2,4 @@ source 'https://rubygems.org'
 gemspec :name => "github-linguist"
 gemspec :name => "github-linguist-grammars"
 gem 'test-unit', require: false if RUBY_VERSION >= '2.2'
 gem 'byebug' if RUBY_VERSION >= '2.0'
--- a/github-linguist.gemspec
+++ b/github-linguist.gemspec
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
  s.add_dependency 'charlock_holmes', '~> 0.7.3'
  s.add_dependency 'escape_utils',    '~> 1.0.1'
  s.add_dependency 'mime-types',      '>= 1.19'
-  s.add_dependency 'rugged',          '~> 0.22.0b1'
+  s.add_dependency 'rugged',          '~> 0.22.0b4'
  s.add_development_dependency 'mocha'
  s.add_development_dependency 'pry'
--- a/grammars.yml
+++ b/grammars.yml
@@ -5,8 +5,6 @@ http://svn.textmate.org/trunk/Review/Bundles/BlitzMax.tmbundle:
 - source.blitzmax
 http://svn.textmate.org/trunk/Review/Bundles/Cython.tmbundle:
 - source.cython
 http://svn.textmate.org/trunk/Review/Bundles/F%20Sharp.tmbundle:
 - source.fsharp
 http://svn.textmate.org/trunk/Review/Bundles/Forth.tmbundle:
 - source.forth
 http://svn.textmate.org/trunk/Review/Bundles/Parrot.tmbundle:
@@ -135,6 +133,8 @@ https://github.com/euler0/sublime-glsl/raw/master/GLSL.tmLanguage:
 - source.glsl
 https://github.com/fancy-lang/fancy-tmbundle:
 - source.fancy
 https://github.com/fsharp/fsharpbinding:
 - source.fsharp
 https://github.com/gingerbeardman/monkey.tmbundle:
 - source.monkey
 https://github.com/guillermooo/dart-sublime-bundle/raw/master/Dart.tmLanguage:
--- a/lib/linguist/file_blob.rb
+++ b/lib/linguist/file_blob.rb
@@ -57,14 +57,20 @@ module Linguist
    #
    # Returns a String.
    def extension
-      # File.extname returns nil if the filename is an extension.
+      extensions.last || ""
-      extension = File.extname(name)
+    end
-      basename = File.basename(name)
+
-      # Checks if the filename is an extension.
+    # Public: Return an array of the file extensions
-      if extension.empty? && basename[0] == "."
+    #
-        basename
+    #     >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions
-      else
+    #     => [".html.erb", ".erb"]
-        extension
+    #
    # Returns an Array
    def extensions
      basename, *segments = File.basename(name).split(".")
      segments.map.with_index do |segment, index|
        "." + segments[index..-1].join(".")
      end
    end
  end
--- a/lib/linguist/heuristics.rb
+++ b/lib/linguist/heuristics.rb
@@ -39,6 +39,9 @@ module Linguist
        if languages.all? { |l| ["FORTRAN", "Forth"].include?(l) }
          result = disambiguate_f(data)
        end
        if languages.all? { |l| ["F#", "Forth", "GLSL"].include?(l) }
          result = disambiguate_fs(data)
        end
        return result
      end
    end
@@ -151,6 +154,18 @@ module Linguist
      matches
    end
    def self.disambiguate_fs(data)
      matches = []
      if /^(: |new-device)/.match(data)
        matches << Language["Forth"]
      elsif /^(#light|import|let|module|namespace|open|type)/.match(data)
        matches << Language["F#"]
      elsif /^(#include|#pragma|precision|uniform|varying|void)/.match(data)
        matches << Language["GLSL"]
      end
      matches
    end
    def self.active?
      !!ACTIVE
    end
--- a/lib/linguist/language.rb
+++ b/lib/linguist/language.rb
@@ -106,40 +106,52 @@ module Linguist
      # A bit of an elegant hack. If the file is executable but extensionless,
      # append a "magic" extension so it can be classified with other
      # languages that have shebang scripts.
-      extension = FileBlob.new(name).extension
+      extensions = FileBlob.new(name).extensions
-      if extension.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
+      if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
        name += ".script!"
      end
-      # First try to find languages that match based on filename.
+      # Find languages that match based on filename.
      possible_languages = find_by_filename(name)
-      # If there is more than one possible language with that extension (or no
+      if possible_languages.length == 1
-      # extension at all, in the case of extensionless scripts), we need to continue
+        # Simplest and most common case, we can just return the one match based
-      # our detection work
+        # on extension
-      if possible_languages.length > 1
+        possible_languages.first
        data = blob.data
        possible_language_names = possible_languages.map(&:name)
        heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
-        if heuristic_languages.size > 1
+      # If there is more than one possible language with that extension (or no
-          possible_language_names = heuristic_languages.map(&:name)
+      # extension at all, in the case of extensionless scripts), we need to
-        end
+      # continue our detection work
      else
        # Matches possible_languages.length == 0 || possible_languages.length > 0
        data = blob.data
        # Check if there's a shebang line and use that as authoritative
        if (result = find_by_shebang(data)) && !result.empty?
-          result.first
+          return result.first
-        # No shebang. Still more work to do. Try to find it with our heuristics.
+
-        elsif heuristic_languages.size == 1
+        # More than one language with that extension. We need to make a choice.
-          heuristic_languages.first
+        elsif possible_languages.length > 1
-        # Lastly, fall back to the probabilistic classifier.
+
-        elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
+          # First try heuristics
-          # Return the actual Language object based of the string language name (i.e., first element of `#classify`)
+
-          Language[classified[0]]
+          possible_language_names = possible_languages.map(&:name)
          heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
          # If there are multiple possible languages returned from heuristics
          # then reduce language candidates for Bayesian classifier here.
          if heuristic_languages.size > 1
            possible_language_names = heuristic_languages.map(&:name)
          end
          if heuristic_languages.size == 1
            return heuristic_languages.first
          # Lastly, fall back to the probabilistic classifier.
          elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
            # Return the actual Language object based of the string language name (i.e., first element of `#classify`)
            return Language[classified[0]]
          end
        end
      else
        # Simplest and most common case, we can just return the one match based on extension
        possible_languages.first
      end
    end
@@ -190,8 +202,13 @@ module Linguist
    # Returns all matching Languages or [] if none were found.
    def self.find_by_filename(filename)
      basename = File.basename(filename)
-      extname = FileBlob.new(filename).extension
+
-      (@filename_index[basename] + find_by_extension(extname)).compact.uniq
+      # find the first extension with language definitions
      extname = FileBlob.new(filename).extensions.detect do |e|
        !@extension_index[e].empty?
      end
      (@filename_index[basename] + @extension_index[extname]).compact.uniq
    end
    # Public: Look up Languages by file extension.
--- a/lib/linguist/languages.yml
+++ b/lib/linguist/languages.yml
@@ -506,6 +506,7 @@ CoffeeScript:
  extensions:
  - .coffee
  - ._coffee
  - .cjsx
  - .cson
  - .iced
  filenames:
@@ -607,6 +608,8 @@ Crystal:
  - .cr
  ace_mode: ruby
  tm_scope: source.ruby
  interpreters:
  - crystal
 Cucumber:
  extensions:
@@ -806,7 +809,6 @@ Erlang:
  - .es
  - .escript
  - .hrl
  ace_mode: erlang
 F#:
  type: programming
@@ -892,7 +894,6 @@ Forth:
  - .for
  - .forth
  - .frt
  ace_mode: forth
 Frege:
  type: programming
@@ -952,6 +953,7 @@ GLSL:
  - .fp
  - .frag
  - .frg
  - .fs
  - .fshader
  - .geo
  - .geom
@@ -1022,6 +1024,8 @@ Gnuplot:
  - .gnuplot
  - .plot
  - .plt
  interpreters:
  - gnuplot
  ace_mode: none
 Go:
@@ -1313,6 +1317,8 @@ Ioke:
  color: "#078193"
  extensions:
  - .ik
  interpreters:
  - ioke
  ace_mode: none
 Isabelle:
@@ -1868,6 +1874,8 @@ Nu:
  - Nukefile
  tm_scope: source.scheme
  ace_mode: scheme
  interpreters:
  - nush
 NumPy:
  group: Python
@@ -2072,6 +2080,8 @@ Parrot Assembly:
  - pasm
  extensions:
  - .pasm
  interpreters:
  - parrot
  tm_scope: none
  ace_mode: none
@@ -2083,6 +2093,8 @@ Parrot Internal Representation:
  - pir
  extensions:
  - .pir
  interpreters:
  - parrot
  ace_mode: none
 Pascal:
@@ -2127,6 +2139,8 @@ Perl6:
  - .p6m
  - .pl6
  - .pm6
  interpreters:
  - perl6
  tm_scope: none
  ace_mode: perl
@@ -2197,6 +2211,8 @@ Prolog:
  - .ecl
  - .pro
  - .prolog
  interpreters:
  - swipl
  ace_mode: prolog
 Propeller Spin:
@@ -2266,6 +2282,8 @@ Python:
  - wscript
  interpreters:
  - python
  - python2
  - python3
 Python traceback:
  type: data
@@ -2288,6 +2306,8 @@ QMake:
  extensions:
  - .pro
  - .pri
  interpreters:
  - qmake
  ace_mode: none
 R:
@@ -2453,6 +2473,8 @@ Ruby:
  - .watchr
  interpreters:
  - ruby
  - macruby
  - rake
  filenames:
  - .pryrc
  - Appraisals
@@ -2545,6 +2567,8 @@ Scala:
  - .scala
  - .sbt
  - .sc
  interpreters:
  - scala
 Scaml:
  group: HTML
--- a/lib/linguist/samples.rb
+++ b/lib/linguist/samples.rb
@@ -52,14 +52,16 @@ module Linguist
              })
            end
          else
            path = File.join(dirname, filename)
            if File.extname(filename) == ""
-              raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
+              raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
            end
            yield({
-              :path     => File.join(dirname, filename),
+              :path     => path,
              :language => category,
-              :interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
+              :interpreter => Linguist.interpreter_from_shebang(File.read(path)),
              :extname  => File.extname(filename)
            })
          end
@@ -131,10 +133,11 @@ module Linguist
      script = script == 'env' ? tokens[1] : script
-      # "python2.6" -> "python"
+      # If script has an invalid shebang, we might get here
-      if script =~ /((?:\d+\.?)+)/
+      return unless script
-        script.sub! $1, ''
+
-      end
+      # "python2.6" -> "python2"
      script.sub! $1, '' if script =~ /(\.\d+)$/
      # Check for multiline shebang hacks that call `exec`
      if script == 'sh' &&
@@ -142,7 +145,7 @@ module Linguist
        script = $1
      end
-      script
+      File.basename(script)
    else
      nil
    end
--- a/samples/CoffeeScript/example.cjsx
+++ b/samples/CoffeeScript/example.cjsx
@@ -0,0 +1,40 @@
 ###* @cjsx React.DOM ###
 define 'myProject.ReactExampleComponent', [
  'React'
  'myProject.ExampleStore'
  'myProject.ExampleActions'
  'myProject.ReactExampleTable'
 ], (React, ExampleStore, ExampleActions, ReactExampleTable ) ->
  ReactExampleComponent = React.createClass
    mixins: [ListenMixin]
    getInitialState: ->
      rows: ExampleStore.getRows()
      meta: ExampleStore.getMeta()
    componentWillMount: ->
      @listenTo ExampleStore
    componentDidMount: ->
      ExampleActions.getExampleData()
    onStoreChange: ->
      if this.isMounted()
        @setState
          rows: ExampleStore.getRows()
          meta: ExampleStore.getMeta()
    componentWillUnmount: ->
      @stopListening ExampleStore
    render: ->
      <div className="page-wrap">
          <header>
            <strong> {@state.title} </strong>
          <header>
            <ReactExampleTable
              rows={@state.rows},
              meta={@state.meta}
            />
      </div>
--- a/samples/F#/sample.fs
+++ b/samples/F#/sample.fs
@@ -0,0 +1,15 @@
 module Sample
 open System
 type Foo =
    {
        Bar : string
    }
 type Baz = interface end
 let Sample1(xs : int list) : string =
    xs
    |> List.map (fun x -> string x)
    |> String.concat ","
--- a/samples/Forth/core.fs
+++ b/samples/Forth/core.fs
@@ -0,0 +1,252 @@
 : immediate   lastxt @ dup c@ negate swap c! ;
 : \   source nip >in ! ; immediate \ Copyright 2004, 2012 Lars Brinkhoff
 : char \ ( "word" -- char )
    bl-word here 1+ c@ ;
 : ahead  here 0 , ;
 : resolve   here swap ! ;
 : '   bl-word here find 0branch [ ahead ] exit [ resolve ] 0 ;
 : postpone-nonimmediate   [ ' literal , ' compile, ] literal , ;
 : create   dovariable_code header, reveal ;
 create postponers
    ' postpone-nonimmediate ,
    ' abort ,
    ' , ,
 : word \ ( char "<chars>string<char>" -- caddr )
    drop bl-word here ;
 : postpone \ ( C: "word" -- )
    bl word find 1+ cells  postponers + @ execute ; immediate
 : unresolved \ ( C: "word" -- orig )
    postpone postpone  postpone ahead ; immediate
 : chars \ ( n1 -- n2 )
    ;
 : else \ ( -- ) ( C: orig1 -- orig2 )
    unresolved branch swap resolve ; immediate
 : if \ ( flag -- ) ( C: -- orig )
    unresolved 0branch ; immediate
 : then \ ( -- ) ( C: orig -- )
    resolve ; immediate
 : [char] \ ( "word" -- )
    char  postpone literal ; immediate
 : (does>)   lastxt @ dodoes_code over >code ! r> swap >does ! ;
 : does>   postpone (does>) ; immediate
 : begin \ ( -- ) ( C: -- dest )
    here ; immediate
 : while \ ( x -- ) ( C: dest -- orig dest )
    unresolved 0branch swap ; immediate
 : repeat \ ( -- ) ( C: orig dest -- )
    postpone branch ,  resolve ; immediate
 : until \ ( x -- ) ( C: dest -- )
    postpone 0branch , ; immediate
 : recurse   lastxt @ compile, ; immediate
 : pad \ ( -- addr )
    here 1024 + ;
 : parse \ ( char "string<char>" -- addr n )
    pad >r  begin
 	source? if <source 2dup <> else 0 0 then
    while
 	r@ c!  r> 1+ >r
    repeat  2drop  pad r> over - ;
 : ( \ ( "string<paren>" -- )
    [ char ) ] literal parse 2drop ; immediate
    \ TODO: If necessary, refill and keep parsing.
 : string, ( addr n -- )
    here over allot align  swap cmove ;
 : (s") ( -- addr n ) ( R: ret1 -- ret2 )
    r> dup @ swap cell+ 2dup + aligned >r swap ;
 create squote   128 allot
 : s" ( "string<quote>" -- addr n )
    state @ if
 	postpone (s")  [char] " parse  dup ,  string,
    else
 	[char] " parse  >r squote r@ cmove  squote r>
    then ; immediate
 : (abort") ( ... addr n -- ) ( R: ... -- )
    cr type cr abort ;
 : abort" ( ... x "string<quote>" -- ) ( R: ... -- )
    postpone if  postpone s"  postpone (abort")  postpone then ; immediate
 \ ----------------------------------------------------------------------
 ( Core words. )
 \ TODO: #
 \ TODO: #>
 \ TODO: #s
 : and  ( x y -- x&y )   nand invert ;
 : *   1 2>r 0 swap begin r@ while
         r> r> swap 2dup dup + 2>r and if swap over + swap then dup +
      repeat r> r> 2drop drop ;
 \ TODO: */mod
 : +loop ( -- ) ( C: nest-sys -- )
    postpone (+loop)  postpone 0branch  ,  postpone unloop ; immediate
 : space   bl emit ;
 : ?.-  dup 0 < if [char] - emit negate then ;
 : digit   [char] 0 + emit ;
 : (.)   base @ /mod  ?dup if recurse then  digit ;
 : ." ( "string<quote>" -- )   postpone s"  postpone type ; immediate
 : . ( x -- )   ?.- (.) space ;
 : postpone-number ( caddr -- )
    0 0 rot count >number dup 0= if
 	2drop nip
 	postpone (literal)  postpone (literal)  postpone ,
 	postpone literal  postpone ,
    else
 	." Undefined: " type cr abort
    then ;
 ' postpone-number  postponers cell+  !
 : / ( x y -- x/y )   /mod nip ;
 : 0< ( n -- flag )   0 < ;
 : 1- ( n -- n-1 )   -1 + ;
 : 2! ( x1 x2 addr -- )   swap over ! cell+ ! ;
 : 2* ( n -- 2n )   dup + ;
 \ Kernel: 2/
 : 2@ ( addr -- x1 x2 )   dup cell+ @ swap @ ;
 \ Kernel: 2drop
 \ Kernel: 2dup
 \ TODO: 2over ( x1 x2 x3 x4 -- x1 x2 x3 x4 x1 x2 )
 \           3 pick 3 pick ;
 \ TODO: 2swap
 \ TODO: <#
 : abs ( n -- |n| )
    dup 0< if negate then ;
 \ TODO: accept
 : c, ( n -- )
    here c!  1 chars allot ;
 : char+ ( n1 -- n2 )
    1+ ;
 : constant   create , does> @ ;
 : decimal ( -- )
    10 base ! ;
 : depth ( -- n )
    data_stack 100 cells +  'SP @  - /cell /  2 - ;
 : do ( n1 n2 -- ) ( R: -- loop-sys ) ( C: -- do-sys )
    postpone 2>r  here ; immediate
 \ TODO: environment?
 \ TODO: evaluate
 \ TODO: fill
 \ TODO: fm/mod )
 \ TODO: hold
 : j ( -- x1 ) ( R: x1 x2 x3 -- x1 x2 x3 )
    'RP @ 3 cells + @ ;
 \ TODO: leave
 : loop ( -- ) ( C: nest-sys -- )
    postpone 1  postpone (+loop)
    postpone 0branch  ,
    postpone unloop ; immediate
 : lshift   begin ?dup while 1- swap dup + swap repeat ;
 : rshift   1 begin over while dup + swap 1- swap repeat nip
           2>r 0 1 begin r@ while
              r> r> 2dup swap dup + 2>r and if swap over + swap then dup +
           repeat r> r> 2drop drop ;
 : max ( x y -- max[x,y] )
    2dup > if drop else nip then ;
 \ Kernel: min
 \ TODO:   mod
 \ TODO:   move
 : (quit) ( R: ... -- )
    return_stack 100 cells + 'RP !
    0 'source-id !  tib ''source !  #tib ''#source !
    postpone [
    begin
 	refill
    while
 	interpret  state @ 0= if ." ok" cr then
    repeat
    bye ;
 ' (quit)  ' quit >body cell+  !
 \ TODO: s>d
 \ TODO: sign
 \ TODO: sm/rem
 : spaces ( n -- )
    0 do space loop ;
 \ TODO: u.
 : signbit ( -- n )   -1 1 rshift invert ;
 : xor ( x y -- x^y )    2dup nand >r r@ nand swap r> nand nand ;
 : u<  ( x y -- flag )  signbit xor swap signbit xor > ;
 \ TODO: um/mod
 : variable ( "word" -- )
    create /cell allot ;
 : ['] \ ( C: "word" -- )
    ' postpone literal ; immediate
--- a/samples/GLSL/recurse1.fs
+++ b/samples/GLSL/recurse1.fs
@@ -0,0 +1,48 @@
 #version 330 core
 // cross-unit recursion
 void main() {}
 // two-level recursion
 float cbar(int);
 void cfoo(float)
 {
 	cbar(2);
 }
 // four-level, out of order
 void CB();
 void CD();
 void CA() { CB(); }
 void CC() { CD(); }
 // high degree
 void CBT();
 void CDT();
 void CAT() { CBT(); CBT(); CBT(); }
 void CCT() { CDT(); CDT(); CBT(); }
 // not recursive
 void norA() {}
 void norB() { norA(); }
 void norC() { norA(); }
 void norD() { norA(); }
 void norE() { norB(); }
 void norF() { norB(); }
 void norG() { norE(); }
 void norH() { norE(); }
 void norI() { norE(); }
 // not recursive, but with a call leading into a cycle if ignoring direction
 void norcA() { }
 void norcB() { norcA(); }
 void norcC() { norcB(); }
 void norcD() { norcC(); norcB(); } // head of cycle
 void norcE() { norcD(); } // lead into cycle
--- a/samples/PHP/drupal.script!
+++ b/samples/PHP/drupal.script!
--- a/samples/Ruby/wrong_shebang.rb
+++ b/samples/Ruby/wrong_shebang.rb
@@ -1,2 +0,0 @@
 #!/usr/bin/env python
 puts "Not Python"
--- a/test/fixtures/Python/run_tests.module
+++ b/test/fixtures/Python/run_tests.module
@@ -0,0 +1,22 @@
 #!/usr/bin/env python
 import sys, os
 # Set the current working directory to the directory where this script is located
 os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
 #### Set the name of the application here and moose directory relative to the application
 app_name = 'stork'
 MODULE_DIR = os.path.abspath('..')
 MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
 #### See if MOOSE_DIR is already in the environment instead
 if os.environ.has_key("MOOSE_DIR"):
  MOOSE_DIR = os.environ['MOOSE_DIR']
 sys.path.append(os.path.join(MOOSE_DIR, 'python'))
 import path_tool
 path_tool.activate_module('TestHarness')
 from TestHarness import TestHarness
 # Run the tests!
 TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)
--- a/test/fixtures/Shell/mintleaf.module
+++ b/test/fixtures/Shell/mintleaf.module
--- a/test/helper.rb
+++ b/test/helper.rb
@@ -0,0 +1,4 @@
 require "bundler/setup"
 require "test/unit"
 require "mocha/setup"
 require "linguist"
--- a/test/test_blob.rb
+++ b/test/test_blob.rb
@@ -1,9 +1,4 @@
-require 'linguist/file_blob'
+require_relative "./helper"
 require 'linguist/samples'
 require 'test/unit'
 require 'mocha/setup'
 require 'mime/types'
 class TestBlob < Test::Unit::TestCase
  include Linguist
@@ -470,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
      assert blob.language, "No language for #{sample[:path]}"
      assert_equal sample[:language], blob.language.name, blob.name
    end
    # Test language detection for files which shouldn't be used as samples
    root = File.expand_path('../fixtures', __FILE__)
    Dir.entries(root).each do |language|
      next unless File.file?(language)
      # Each directory contains test files of a language
      dirname = File.join(root, language)
      Dir.entries(dirname).each do |filename|
        next unless File.file?(filename)
        # By default blob search the file in the samples;
        # thus, we need to give it the absolute path
        filepath = File.join(dirname, filename)
        blob = blob(filepath)
        assert blob.language, "No language for #{filepath}"
        assert_equal language, blob.language.name, blob.name
      end
    end
  end
  def test_minified_files_not_safe_to_highlight
--- a/test/test_classifier.rb
+++ b/test/test_classifier.rb
@@ -1,9 +1,4 @@
-require 'linguist/classifier'
+require_relative "./helper"
 require 'linguist/language'
 require 'linguist/samples'
 require 'linguist/tokenizer'
 require 'test/unit'
 class TestClassifier < Test::Unit::TestCase
  include Linguist
--- a/test/test_file_blob.rb
+++ b/test/test_file_blob.rb
@@ -0,0 +1,10 @@
 require 'linguist/file_blob'
 require 'test/unit'
 class TestFileBlob < Test::Unit::TestCase
  def test_extensions
    assert_equal [".gitignore"], Linguist::FileBlob.new(".gitignore").extensions
    assert_equal [".xml"],  Linguist::FileBlob.new("build.xml").extensions
    assert_equal [".html.erb", ".erb"],  Linguist::FileBlob.new("dotted.dir/index.html.erb").extensions
  end
 end
--- a/test/test_heuristics.rb
+++ b/test/test_heuristics.rb
@@ -1,9 +1,4 @@
-require 'linguist/heuristics'
+require_relative "./helper"
 require 'linguist/language'
 require 'linguist/samples'
 require 'linguist/file_blob'
 require 'test/unit'
 class TestHeuristcs < Test::Unit::TestCase
  include Linguist
@@ -132,4 +127,14 @@ class TestHeuristcs < Test::Unit::TestCase
    results = Heuristics.disambiguate_sc(fixture("Scala/node11.sc"))
    assert_equal Language["Scala"], results.first
  end
  def test_fs_by_heuristics
    languages = ["F#", "Forth", "GLSL"]
    languages.each do |language|
      all_fixtures(language).each do |fixture|
        results = Heuristics.disambiguate_fs(fixture("#{language}/#{File.basename(fixture)}"))
        assert_equal Language[language], results.first
      end
    end
  end
 end
--- a/test/test_language.rb
+++ b/test/test_language.rb
@@ -1,6 +1,4 @@
-require 'linguist/language'
+require_relative "./helper"
 require 'test/unit'
 require 'yaml'
 class TestLanguage < Test::Unit::TestCase
  include Linguist
--- a/test/test_md5.rb
+++ b/test/test_md5.rb
@@ -1,6 +1,4 @@
-require 'linguist/md5'
+require_relative "./helper"
 require 'test/unit'
 class TestMD5 < Test::Unit::TestCase
  include Linguist
--- a/test/test_pedantic.rb
+++ b/test/test_pedantic.rb
@@ -1,5 +1,4 @@
-require 'test/unit'
+require_relative "./helper"
 require 'yaml'
 class TestPedantic < Test::Unit::TestCase
  filename = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
--- a/test/test_repository.rb
+++ b/test/test_repository.rb
@@ -1,6 +1,4 @@
-require 'linguist/repository'
+require_relative "./helper"
 require 'linguist/lazy_blob'
 require 'test/unit'
 class TestRepository < Test::Unit::TestCase
  def rugged_repository
--- a/test/test_samples.rb
+++ b/test/test_samples.rb
@@ -1,8 +1,5 @@
-require 'linguist/samples'
+require_relative "./helper"
-require 'linguist/language'
+require "tempfile"
 require 'tempfile'
 require 'yajl'
 require 'test/unit'
 class TestSamples < Test::Unit::TestCase
  include Linguist
@@ -34,23 +31,29 @@ class TestSamples < Test::Unit::TestCase
    assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
    assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
    assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } }
    assert !data["interpreters"].empty?
  end
-  # Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
+  # Check that there aren't samples with extensions or interpreters that
-  def test_parity
+  # aren't explicitly defined in languages.yml
-    extensions = Samples.cache['extnames']
+  languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
-    languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
+  YAML.load_file(languages_yml).each do |name, options|
-    languages = YAML.load_file(languages_yml)
+    define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do
    languages.each do |name, options|
      options['extensions'] ||= []
-
+      if extnames = Samples.cache['extnames'][name]
      if extnames = extensions[name]
        extnames.each do |extname|
          next if extname == '.script!'
          assert options['extensions'].include?(extname), "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml"
        end
      end
      options['interpreters'] ||= []
      if interpreters = Samples.cache['interpreters'][name]
        interpreters.each do |interpreter|
          # next if extname == '.script!'
          assert options['interpreters'].include?(interpreter), "#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml"
        end
      end
    end
  end
@@ -79,4 +82,9 @@ class TestSamples < Test::Unit::TestCase
      end
    end
  end
  def test_shebang
    assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
    assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
  end
 end
--- a/test/test_tokenizer.rb
+++ b/test/test_tokenizer.rb
@@ -1,6 +1,4 @@
-require 'linguist/tokenizer'
+require_relative "./helper"
 require 'test/unit'
 class TestTokenizer < Test::Unit::TestCase
  include Linguist
--- a/vendor/cache/byebug-3.5.1.gem
+++ b/vendor/cache/byebug-3.5.1.gem
--- a/vendor/cache/columnize-0.8.9.gem
+++ b/vendor/cache/columnize-0.8.9.gem
--- a/vendor/cache/debugger-linecache-1.2.0.gem
+++ b/vendor/cache/debugger-linecache-1.2.0.gem
--- a/vendor/cache/rugged-0.22.0b1.gem
+++ b/vendor/cache/rugged-0.22.0b1.gem
--- a/vendor/cache/rugged-0.22.0b4.gem
+++ b/vendor/cache/rugged-0.22.0b4.gem