Merge branch 'master' of https://github.com/github/linguist

Conflicts: grammars.yml
2025-12-08 20:38:47 +00:00 · 2014-11-27 13:47:56 +01:00
parent 02db72515f 9f103abfb5
commit 43ee45d9b6
30 changed files with 1732 additions and 95 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,7 @@ before_install:
  - git fetch origin master:master
  - git fetch origin v2.0.0:v2.0.0
  - git fetch origin test/attributes:test/attributes
+  - git fetch origin test/master:test/master
  - sudo apt-get install libicu-dev -y
 rvm:
  - 1.9.3
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,7 +12,7 @@ This can usually be solved either by adding a new filename or file name extensio

 Assuming your code is being detected as the right language (see above), in most cases this is due to a bug in the language grammar rather than a bug in Linguist. [`grammars.yml`][grammars] lists all the grammars we use for syntax highlighting on github.com. Find the one corresponding to your code's programming language and submit a bug report upstream.

-You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars.
+You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars. You can test grammars using [Lightshow](https://lightshow.githubapp.com).

 Once the bug has been fixed upstream, please let us know and we'll pick it up for GitHub.

--- a/1
+++ b/1
@@ -2,3 +2,4 @@ source 'https://rubygems.org'
 gemspec :name => "github-linguist"
 gemspec :name => "github-linguist-grammars"
 gem 'test-unit', require: false if RUBY_VERSION >= '2.2'
+gem 'byebug' if RUBY_VERSION >= '2.0'
--- a/github-linguist.gemspec
+++ b/github-linguist.gemspec
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
  s.add_dependency 'charlock_holmes', '~> 0.7.3'
  s.add_dependency 'escape_utils',    '~> 1.0.1'
  s.add_dependency 'mime-types',      '>= 1.19'
-  s.add_dependency 'rugged',          '~> 0.22.0b1'
+  s.add_dependency 'rugged',          '~> 0.22.0b4'

  s.add_development_dependency 'mocha'
  s.add_development_dependency 'pry'
--- a/grammars.yml
+++ b/grammars.yml
@@ -5,8 +5,6 @@ http://svn.textmate.org/trunk/Review/Bundles/BlitzMax.tmbundle:
 - source.blitzmax
 http://svn.textmate.org/trunk/Review/Bundles/Cython.tmbundle:
 - source.cython
-http://svn.textmate.org/trunk/Review/Bundles/F%20Sharp.tmbundle:
- source.fsharp
 http://svn.textmate.org/trunk/Review/Bundles/Forth.tmbundle:
 - source.forth
 http://svn.textmate.org/trunk/Review/Bundles/Parrot.tmbundle:
@@ -137,6 +135,8 @@ https://github.com/fancy-lang/fancy-tmbundle:
 - source.fancy
 https://github.com/fushnisoft/SublimeClarion:
 - source.clarion
+https://github.com/fsharp/fsharpbinding:
+- source.fsharp
 https://github.com/gingerbeardman/monkey.tmbundle:
 - source.monkey
 https://github.com/guillermooo/dart-sublime-bundle/raw/master/Dart.tmLanguage:
--- a/lib/linguist/file_blob.rb
+++ b/lib/linguist/file_blob.rb
@@ -57,14 +57,20 @@ module Linguist
    #
    # Returns a String.
    def extension
-      # File.extname returns nil if the filename is an extension.
-      extension = File.extname(name)
-      basename = File.basename(name)
-      # Checks if the filename is an extension.
-      if extension.empty? && basename[0] == "."
-        basename
-      else
-        extension
+      extensions.last || ""
+    end
+
+    # Public: Return an array of the file extensions
+    #
+    #     >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions
+    #     => [".html.erb", ".erb"]
+    #
+    # Returns an Array
+    def extensions
+      basename, *segments = File.basename(name).split(".")
+
+      segments.map.with_index do |segment, index|
+        "." + segments[index..-1].join(".")
      end
    end
  end
--- a/lib/linguist/language.rb
+++ b/lib/linguist/language.rb
@@ -106,40 +106,52 @@ module Linguist
      # A bit of an elegant hack. If the file is executable but extensionless,
      # append a "magic" extension so it can be classified with other
      # languages that have shebang scripts.
-      extension = FileBlob.new(name).extension
-      if extension.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
+      extensions = FileBlob.new(name).extensions
+      if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
        name += ".script!"
      end

-      # First try to find languages that match based on filename.
+      # Find languages that match based on filename.
      possible_languages = find_by_filename(name)

-      # If there is more than one possible language with that extension (or no
-      # extension at all, in the case of extensionless scripts), we need to continue
-      # our detection work
-      if possible_languages.length > 1
-        data = blob.data
-        possible_language_names = possible_languages.map(&:name)
-        heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
+      if possible_languages.length == 1
+        # Simplest and most common case, we can just return the one match based
+        # on extension
+        possible_languages.first

-        if heuristic_languages.size > 1
-          possible_language_names = heuristic_languages.map(&:name)
-        end
+      # If there is more than one possible language with that extension (or no
+      # extension at all, in the case of extensionless scripts), we need to
+      # continue our detection work
+      else
+        # Matches possible_languages.length == 0 || possible_languages.length > 0
+        data = blob.data

        # Check if there's a shebang line and use that as authoritative
        if (result = find_by_shebang(data)) && !result.empty?
-          result.first
-        # No shebang. Still more work to do. Try to find it with our heuristics.
-        elsif heuristic_languages.size == 1
-          heuristic_languages.first
-        # Lastly, fall back to the probabilistic classifier.
-        elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
-          # Return the actual Language object based of the string language name (i.e., first element of `#classify`)
-          Language[classified[0]]
+          return result.first
+
+        # More than one language with that extension. We need to make a choice.
+        elsif possible_languages.length > 1
+
+          # First try heuristics
+
+          possible_language_names = possible_languages.map(&:name)
+          heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
+
+          # If there are multiple possible languages returned from heuristics
+          # then reduce language candidates for Bayesian classifier here.
+          if heuristic_languages.size > 1
+            possible_language_names = heuristic_languages.map(&:name)
+          end
+
+          if heuristic_languages.size == 1
+            return heuristic_languages.first
+          # Lastly, fall back to the probabilistic classifier.
+          elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
+            # Return the actual Language object based of the string language name (i.e., first element of `#classify`)
+            return Language[classified[0]]
+          end
        end
-      else
-        # Simplest and most common case, we can just return the one match based on extension
-        possible_languages.first
      end
    end

@@ -190,8 +202,13 @@ module Linguist
    # Returns all matching Languages or [] if none were found.
    def self.find_by_filename(filename)
      basename = File.basename(filename)
-      extname = FileBlob.new(filename).extension
-      (@filename_index[basename] + find_by_extension(extname)).compact.uniq
+
+      # find the first extension with language definitions
+      extname = FileBlob.new(filename).extensions.detect do |e|
+        !@extension_index[e].empty?
+      end
+
+      (@filename_index[basename] + @extension_index[extname]).compact.uniq
    end

    # Public: Look up Languages by file extension.
--- a/lib/linguist/languages.yml
+++ b/lib/linguist/languages.yml
@@ -470,6 +470,7 @@ CoffeeScript:
  extensions:
  - .coffee
  - ._coffee
+  - .cjsx
  - .cson
  - .iced
  filenames:
@@ -566,6 +567,8 @@ Crystal:
  - .cr
  ace_mode: ruby
  tm_scope: source.ruby
+  interpreters:
+  - crystal

 Cucumber:
  extensions:
@@ -743,6 +746,8 @@ Erlang:
  - .es
  - .escript
  - .hrl
+  interpreters:
+  - escript

 F#:
  type: programming
@@ -938,6 +943,8 @@ Gnuplot:
  - .gnuplot
  - .plot
  - .plt
+  interpreters:
+  - gnuplot

 Go:
  type: programming
@@ -1203,6 +1210,8 @@ Ioke:
  color: "#078193"
  extensions:
  - .ik
+  interpreters:
+  - ioke

 Isabelle:
  type: programming
@@ -1710,6 +1719,8 @@ Nu:
  filenames:
  - Nukefile
  tm_scope: source.scheme
+  interpreters:
+  - nush

 NumPy:
  group: Python
@@ -1896,6 +1907,8 @@ Parrot Assembly:
  - pasm
  extensions:
  - .pasm
+  interpreters:
+  - parrot
  tm_scope: none

 Parrot Internal Representation:
@@ -1906,6 +1919,8 @@ Parrot Internal Representation:
  - pir
  extensions:
  - .pir
+  interpreters:
+  - parrot

 Pascal:
  type: programming
@@ -1948,6 +1963,8 @@ Perl6:
  - .p6m
  - .pl6
  - .pm6
+  interpreters:
+  - perl6
  tm_scope: none

 PigLatin:
@@ -2012,6 +2029,8 @@ Prolog:
  - .ecl
  - .pro
  - .prolog
+  interpreters:
+  - swipl

 Propeller Spin:
  type: programming
@@ -2075,6 +2094,8 @@ Python:
  - wscript
  interpreters:
  - python
+  - python2
+  - python3

 Python traceback:
  type: data
@@ -2095,6 +2116,8 @@ QMake:
  extensions:
  - .pro
  - .pri
+  interpreters:
+  - qmake

 R:
  type: programming
@@ -2249,6 +2272,8 @@ Ruby:
  - .watchr
  interpreters:
  - ruby
+  - macruby
+  - rake
  filenames:
  - .pryrc
  - Appraisals
@@ -2335,6 +2360,8 @@ Scala:
  - .scala
  - .sbt
  - .sc
+  interpreters:
+  - scala

 Scaml:
  group: HTML
--- a/lib/linguist/samples.rb
+++ b/lib/linguist/samples.rb
@@ -52,14 +52,16 @@ module Linguist
              })
            end
          else
+            path = File.join(dirname, filename)
+
            if File.extname(filename) == ""
-              raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
+              raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
            end

            yield({
-              :path     => File.join(dirname, filename),
+              :path     => path,
              :language => category,
-              :interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
+              :interpreter => Linguist.interpreter_from_shebang(File.read(path)),
              :extname  => File.extname(filename)
            })
          end
@@ -131,18 +133,19 @@ module Linguist

      script = script == 'env' ? tokens[1] : script

-      # "python2.6" -> "python"
-      if script =~ /((?:\d+\.?)+)/
-        script.sub! $1, ''
-      end
+      # If script has an invalid shebang, we might get here
+      return unless script
+
+      # "python2.6" -> "python2"
+      script.sub! $1, '' if script =~ /(\.\d+)$/

      # Check for multiline shebang hacks that call `exec`
      if script == 'sh' &&
        lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
        script = $1
      end
-
-      script
+      
+      File.basename(script)
    else
      nil
    end
--- a/samples/CoffeeScript/example.cjsx
+++ b/samples/CoffeeScript/example.cjsx
@@ -0,0 +1,40 @@
+###* @cjsx React.DOM ###
+define 'myProject.ReactExampleComponent', [
+  'React'
+  'myProject.ExampleStore'
+  'myProject.ExampleActions'
+  'myProject.ReactExampleTable'
+], (React, ExampleStore, ExampleActions, ReactExampleTable ) ->
+
+  ReactExampleComponent = React.createClass
+    mixins: [ListenMixin]
+
+    getInitialState: ->
+      rows: ExampleStore.getRows()
+      meta: ExampleStore.getMeta()
+
+    componentWillMount: ->
+      @listenTo ExampleStore
+
+    componentDidMount: ->
+      ExampleActions.getExampleData()
+
+    onStoreChange: ->
+      if this.isMounted()
+        @setState
+          rows: ExampleStore.getRows()
+          meta: ExampleStore.getMeta()
+
+    componentWillUnmount: ->
+      @stopListening ExampleStore
+
+    render: ->
+      <div className="page-wrap">
+          <header>
+            <strong> {@state.title} </strong>
+          <header>
+            <ReactExampleTable
+              rows={@state.rows},
+              meta={@state.meta}
+            />
+      </div>
--- a/samples/PHP/drupal.script!
+++ b/samples/PHP/drupal.script!
--- a/samples/Ruby/wrong_shebang.rb
+++ b/samples/Ruby/wrong_shebang.rb
@@ -1,2 +0,0 @@
-#!/usr/bin/env python
-puts "Not Python"
--- a/test/fixtures/Python/run_tests.module
+++ b/test/fixtures/Python/run_tests.module
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+import sys, os
+
+# Set the current working directory to the directory where this script is located
+os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
+
+#### Set the name of the application here and moose directory relative to the application
+app_name = 'stork'
+
+MODULE_DIR = os.path.abspath('..')
+MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
+#### See if MOOSE_DIR is already in the environment instead
+if os.environ.has_key("MOOSE_DIR"):
+  MOOSE_DIR = os.environ['MOOSE_DIR']
+
+sys.path.append(os.path.join(MOOSE_DIR, 'python'))
+import path_tool
+path_tool.activate_module('TestHarness')
+
+from TestHarness import TestHarness
+# Run the tests!
+TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)
--- a/test/fixtures/Shell/mintleaf.module
+++ b/test/fixtures/Shell/mintleaf.module
--- a/test/helper.rb
+++ b/test/helper.rb
@@ -0,0 +1,4 @@
+require "bundler/setup"
+require "test/unit"
+require "mocha/setup"
+require "linguist"
--- a/test/test_blob.rb
+++ b/test/test_blob.rb
@@ -1,9 +1,4 @@
-require 'linguist/file_blob'
-require 'linguist/samples'
-
-require 'test/unit'
-require 'mocha/setup'
-require 'mime/types'
+require_relative "./helper"

 class TestBlob < Test::Unit::TestCase
  include Linguist
@@ -470,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
      assert blob.language, "No language for #{sample[:path]}"
      assert_equal sample[:language], blob.language.name, blob.name
    end
+
+    # Test language detection for files which shouldn't be used as samples
+    root = File.expand_path('../fixtures', __FILE__)
+    Dir.entries(root).each do |language|
+      next unless File.file?(language)
+
+      # Each directory contains test files of a language
+      dirname = File.join(root, language)
+      Dir.entries(dirname).each do |filename|
+        next unless File.file?(filename)
+        
+        # By default blob search the file in the samples;
+        # thus, we need to give it the absolute path
+        filepath = File.join(dirname, filename)
+        blob = blob(filepath)
+        assert blob.language, "No language for #{filepath}"
+        assert_equal language, blob.language.name, blob.name
+      end
+    end
  end

  def test_minified_files_not_safe_to_highlight
--- a/test/test_classifier.rb
+++ b/test/test_classifier.rb
@@ -1,9 +1,4 @@
-require 'linguist/classifier'
-require 'linguist/language'
-require 'linguist/samples'
-require 'linguist/tokenizer'
-
-require 'test/unit'
+require_relative "./helper"

 class TestClassifier < Test::Unit::TestCase
  include Linguist
--- a/test/test_file_blob.rb
+++ b/test/test_file_blob.rb
@@ -0,0 +1,10 @@
+require 'linguist/file_blob'
+require 'test/unit'
+
+class TestFileBlob < Test::Unit::TestCase
+  def test_extensions
+    assert_equal [".gitignore"], Linguist::FileBlob.new(".gitignore").extensions
+    assert_equal [".xml"],  Linguist::FileBlob.new("build.xml").extensions
+    assert_equal [".html.erb", ".erb"],  Linguist::FileBlob.new("dotted.dir/index.html.erb").extensions
+  end
+end
--- a/test/test_heuristics.rb
+++ b/test/test_heuristics.rb
@@ -1,9 +1,4 @@
-require 'linguist/heuristics'
-require 'linguist/language'
-require 'linguist/samples'
-require 'linguist/file_blob'
-
-require 'test/unit'
+require_relative "./helper"

 class TestHeuristcs < Test::Unit::TestCase
  include Linguist
--- a/test/test_language.rb
+++ b/test/test_language.rb
@@ -1,6 +1,4 @@
-require 'linguist/language'
-require 'test/unit'
-require 'yaml'
+require_relative "./helper"

 class TestLanguage < Test::Unit::TestCase
  include Linguist
--- a/test/test_md5.rb
+++ b/test/test_md5.rb
@@ -1,6 +1,4 @@
-require 'linguist/md5'
-
-require 'test/unit'
+require_relative "./helper"

 class TestMD5 < Test::Unit::TestCase
  include Linguist
--- a/test/test_pedantic.rb
+++ b/test/test_pedantic.rb
@@ -1,5 +1,4 @@
-require 'test/unit'
-require 'yaml'
+require_relative "./helper"

 class TestPedantic < Test::Unit::TestCase
  filename = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
--- a/test/test_repository.rb
+++ b/test/test_repository.rb
@@ -1,6 +1,4 @@
-require 'linguist/repository'
-require 'linguist/lazy_blob'
-require 'test/unit'
+require_relative "./helper"

 class TestRepository < Test::Unit::TestCase
  def rugged_repository
--- a/test/test_samples.rb
+++ b/test/test_samples.rb
@@ -1,8 +1,5 @@
-require 'linguist/samples'
-require 'linguist/language'
-require 'tempfile'
-require 'yajl'
-require 'test/unit'
+require_relative "./helper"
+require "tempfile"

 class TestSamples < Test::Unit::TestCase
  include Linguist
@@ -34,23 +31,29 @@ class TestSamples < Test::Unit::TestCase
    assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
    assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
    assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } }
+    assert !data["interpreters"].empty?
  end

-  # Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
-  def test_parity
-    extensions = Samples.cache['extnames']
-    languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
-    languages = YAML.load_file(languages_yml)
-
-    languages.each do |name, options|
+  # Check that there aren't samples with extensions or interpreters that
+  # aren't explicitly defined in languages.yml
+  languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
+  YAML.load_file(languages_yml).each do |name, options|
+    define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do
      options['extensions'] ||= []
-
-      if extnames = extensions[name]
+      if extnames = Samples.cache['extnames'][name]
        extnames.each do |extname|
          next if extname == '.script!'
          assert options['extensions'].include?(extname), "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml"
        end
      end
+
+      options['interpreters'] ||= []
+      if interpreters = Samples.cache['interpreters'][name]
+        interpreters.each do |interpreter|
+          # next if extname == '.script!'
+          assert options['interpreters'].include?(interpreter), "#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml"
+        end
+      end
    end
  end

@@ -79,4 +82,9 @@ class TestSamples < Test::Unit::TestCase
      end
    end
  end
+
+  def test_shebang
+    assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
+    assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
+  end
 end
--- a/test/test_tokenizer.rb
+++ b/test/test_tokenizer.rb
@@ -1,6 +1,4 @@
-require 'linguist/tokenizer'
-
-require 'test/unit'
+require_relative "./helper"

 class TestTokenizer < Test::Unit::TestCase
  include Linguist
--- a/vendor/cache/byebug-3.5.1.gem
+++ b/vendor/cache/byebug-3.5.1.gem
--- a/vendor/cache/columnize-0.8.9.gem
+++ b/vendor/cache/columnize-0.8.9.gem
--- a/vendor/cache/debugger-linecache-1.2.0.gem
+++ b/vendor/cache/debugger-linecache-1.2.0.gem
--- a/vendor/cache/rugged-0.22.0b1.gem
+++ b/vendor/cache/rugged-0.22.0b1.gem
--- a/vendor/cache/rugged-0.22.0b4.gem
+++ b/vendor/cache/rugged-0.22.0b4.gem