Conflicts:
	grammars.yml
This commit is contained in:
Brahn Partridge
2014-11-27 13:47:56 +01:00
30 changed files with 1732 additions and 95 deletions

View File

@@ -2,6 +2,7 @@ before_install:
- git fetch origin master:master
- git fetch origin v2.0.0:v2.0.0
- git fetch origin test/attributes:test/attributes
- git fetch origin test/master:test/master
- sudo apt-get install libicu-dev -y
rvm:
- 1.9.3

View File

@@ -12,7 +12,7 @@ This can usually be solved either by adding a new filename or file name extensio
Assuming your code is being detected as the right language (see above), in most cases this is due to a bug in the language grammar rather than a bug in Linguist. [`grammars.yml`][grammars] lists all the grammars we use for syntax highlighting on github.com. Find the one corresponding to your code's programming language and submit a bug report upstream.
You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars.
You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars. You can test grammars using [Lightshow](https://lightshow.githubapp.com).
Once the bug has been fixed upstream, please let us know and we'll pick it up for GitHub.

View File

@@ -2,3 +2,4 @@ source 'https://rubygems.org'
gemspec :name => "github-linguist"
gemspec :name => "github-linguist-grammars"
gem 'test-unit', require: false if RUBY_VERSION >= '2.2'
gem 'byebug' if RUBY_VERSION >= '2.0'

View File

@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
s.add_dependency 'charlock_holmes', '~> 0.7.3'
s.add_dependency 'escape_utils', '~> 1.0.1'
s.add_dependency 'mime-types', '>= 1.19'
s.add_dependency 'rugged', '~> 0.22.0b1'
s.add_dependency 'rugged', '~> 0.22.0b4'
s.add_development_dependency 'mocha'
s.add_development_dependency 'pry'

View File

@@ -5,8 +5,6 @@ http://svn.textmate.org/trunk/Review/Bundles/BlitzMax.tmbundle:
- source.blitzmax
http://svn.textmate.org/trunk/Review/Bundles/Cython.tmbundle:
- source.cython
http://svn.textmate.org/trunk/Review/Bundles/F%20Sharp.tmbundle:
- source.fsharp
http://svn.textmate.org/trunk/Review/Bundles/Forth.tmbundle:
- source.forth
http://svn.textmate.org/trunk/Review/Bundles/Parrot.tmbundle:
@@ -137,6 +135,8 @@ https://github.com/fancy-lang/fancy-tmbundle:
- source.fancy
https://github.com/fushnisoft/SublimeClarion:
- source.clarion
https://github.com/fsharp/fsharpbinding:
- source.fsharp
https://github.com/gingerbeardman/monkey.tmbundle:
- source.monkey
https://github.com/guillermooo/dart-sublime-bundle/raw/master/Dart.tmLanguage:

View File

@@ -57,14 +57,20 @@ module Linguist
#
# Returns a String.
def extension
# File.extname returns nil if the filename is an extension.
extension = File.extname(name)
basename = File.basename(name)
# Checks if the filename is an extension.
if extension.empty? && basename[0] == "."
basename
else
extension
extensions.last || ""
end
# Public: Return an array of the file extensions
#
# >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions
# => [".html.erb", ".erb"]
#
# Returns an Array
def extensions
basename, *segments = File.basename(name).split(".")
segments.map.with_index do |segment, index|
"." + segments[index..-1].join(".")
end
end
end

View File

@@ -106,40 +106,52 @@ module Linguist
# A bit of an elegant hack. If the file is executable but extensionless,
# append a "magic" extension so it can be classified with other
# languages that have shebang scripts.
extension = FileBlob.new(name).extension
if extension.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
extensions = FileBlob.new(name).extensions
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
name += ".script!"
end
# First try to find languages that match based on filename.
# Find languages that match based on filename.
possible_languages = find_by_filename(name)
# If there is more than one possible language with that extension (or no
# extension at all, in the case of extensionless scripts), we need to continue
# our detection work
if possible_languages.length > 1
data = blob.data
possible_language_names = possible_languages.map(&:name)
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
if possible_languages.length == 1
# Simplest and most common case, we can just return the one match based
# on extension
possible_languages.first
if heuristic_languages.size > 1
possible_language_names = heuristic_languages.map(&:name)
end
# If there is more than one possible language with that extension (or no
# extension at all, in the case of extensionless scripts), we need to
# continue our detection work
else
# Matches possible_languages.length == 0 || possible_languages.length > 0
data = blob.data
# Check if there's a shebang line and use that as authoritative
if (result = find_by_shebang(data)) && !result.empty?
result.first
# No shebang. Still more work to do. Try to find it with our heuristics.
elsif heuristic_languages.size == 1
heuristic_languages.first
# Lastly, fall back to the probabilistic classifier.
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]]
return result.first
# More than one language with that extension. We need to make a choice.
elsif possible_languages.length > 1
# First try heuristics
possible_language_names = possible_languages.map(&:name)
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
# If there are multiple possible languages returned from heuristics
# then reduce language candidates for Bayesian classifier here.
if heuristic_languages.size > 1
possible_language_names = heuristic_languages.map(&:name)
end
if heuristic_languages.size == 1
return heuristic_languages.first
# Lastly, fall back to the probabilistic classifier.
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
return Language[classified[0]]
end
end
else
# Simplest and most common case, we can just return the one match based on extension
possible_languages.first
end
end
@@ -190,8 +202,13 @@ module Linguist
# Returns all matching Languages or [] if none were found.
def self.find_by_filename(filename)
basename = File.basename(filename)
extname = FileBlob.new(filename).extension
(@filename_index[basename] + find_by_extension(extname)).compact.uniq
# find the first extension with language definitions
extname = FileBlob.new(filename).extensions.detect do |e|
!@extension_index[e].empty?
end
(@filename_index[basename] + @extension_index[extname]).compact.uniq
end
# Public: Look up Languages by file extension.

View File

@@ -470,6 +470,7 @@ CoffeeScript:
extensions:
- .coffee
- ._coffee
- .cjsx
- .cson
- .iced
filenames:
@@ -566,6 +567,8 @@ Crystal:
- .cr
ace_mode: ruby
tm_scope: source.ruby
interpreters:
- crystal
Cucumber:
extensions:
@@ -743,6 +746,8 @@ Erlang:
- .es
- .escript
- .hrl
interpreters:
- escript
F#:
type: programming
@@ -938,6 +943,8 @@ Gnuplot:
- .gnuplot
- .plot
- .plt
interpreters:
- gnuplot
Go:
type: programming
@@ -1203,6 +1210,8 @@ Ioke:
color: "#078193"
extensions:
- .ik
interpreters:
- ioke
Isabelle:
type: programming
@@ -1710,6 +1719,8 @@ Nu:
filenames:
- Nukefile
tm_scope: source.scheme
interpreters:
- nush
NumPy:
group: Python
@@ -1896,6 +1907,8 @@ Parrot Assembly:
- pasm
extensions:
- .pasm
interpreters:
- parrot
tm_scope: none
Parrot Internal Representation:
@@ -1906,6 +1919,8 @@ Parrot Internal Representation:
- pir
extensions:
- .pir
interpreters:
- parrot
Pascal:
type: programming
@@ -1948,6 +1963,8 @@ Perl6:
- .p6m
- .pl6
- .pm6
interpreters:
- perl6
tm_scope: none
PigLatin:
@@ -2012,6 +2029,8 @@ Prolog:
- .ecl
- .pro
- .prolog
interpreters:
- swipl
Propeller Spin:
type: programming
@@ -2075,6 +2094,8 @@ Python:
- wscript
interpreters:
- python
- python2
- python3
Python traceback:
type: data
@@ -2095,6 +2116,8 @@ QMake:
extensions:
- .pro
- .pri
interpreters:
- qmake
R:
type: programming
@@ -2249,6 +2272,8 @@ Ruby:
- .watchr
interpreters:
- ruby
- macruby
- rake
filenames:
- .pryrc
- Appraisals
@@ -2335,6 +2360,8 @@ Scala:
- .scala
- .sbt
- .sc
interpreters:
- scala
Scaml:
group: HTML

View File

@@ -52,14 +52,16 @@ module Linguist
})
end
else
path = File.join(dirname, filename)
if File.extname(filename) == ""
raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
end
yield({
:path => File.join(dirname, filename),
:path => path,
:language => category,
:interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
:interpreter => Linguist.interpreter_from_shebang(File.read(path)),
:extname => File.extname(filename)
})
end
@@ -131,18 +133,19 @@ module Linguist
script = script == 'env' ? tokens[1] : script
# "python2.6" -> "python"
if script =~ /((?:\d+\.?)+)/
script.sub! $1, ''
end
# If script has an invalid shebang, we might get here
return unless script
# "python2.6" -> "python2"
script.sub! $1, '' if script =~ /(\.\d+)$/
# Check for multiline shebang hacks that call `exec`
if script == 'sh' &&
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
script = $1
end
script
File.basename(script)
else
nil
end

View File

@@ -0,0 +1,40 @@
###* @cjsx React.DOM ###
define 'myProject.ReactExampleComponent', [
'React'
'myProject.ExampleStore'
'myProject.ExampleActions'
'myProject.ReactExampleTable'
], (React, ExampleStore, ExampleActions, ReactExampleTable ) ->
ReactExampleComponent = React.createClass
mixins: [ListenMixin]
getInitialState: ->
rows: ExampleStore.getRows()
meta: ExampleStore.getMeta()
componentWillMount: ->
@listenTo ExampleStore
componentDidMount: ->
ExampleActions.getExampleData()
onStoreChange: ->
if this.isMounted()
@setState
rows: ExampleStore.getRows()
meta: ExampleStore.getMeta()
componentWillUnmount: ->
@stopListening ExampleStore
render: ->
<div className="page-wrap">
<header>
<strong> {@state.title} </strong>
<header>
<ReactExampleTable
rows={@state.rows},
meta={@state.meta}
/>
</div>

View File

@@ -1,2 +0,0 @@
#!/usr/bin/env python
puts "Not Python"

22
test/fixtures/Python/run_tests.module vendored Normal file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python
import sys, os
# Set the current working directory to the directory where this script is located
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
#### Set the name of the application here and moose directory relative to the application
app_name = 'stork'
MODULE_DIR = os.path.abspath('..')
MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
#### See if MOOSE_DIR is already in the environment instead
if os.environ.has_key("MOOSE_DIR"):
MOOSE_DIR = os.environ['MOOSE_DIR']
sys.path.append(os.path.join(MOOSE_DIR, 'python'))
import path_tool
path_tool.activate_module('TestHarness')
from TestHarness import TestHarness
# Run the tests!
TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)

1505
test/fixtures/Shell/mintleaf.module vendored Normal file

File diff suppressed because it is too large Load Diff

4
test/helper.rb Normal file
View File

@@ -0,0 +1,4 @@
require "bundler/setup"
require "test/unit"
require "mocha/setup"
require "linguist"

View File

@@ -1,9 +1,4 @@
require 'linguist/file_blob'
require 'linguist/samples'
require 'test/unit'
require 'mocha/setup'
require 'mime/types'
require_relative "./helper"
class TestBlob < Test::Unit::TestCase
include Linguist
@@ -470,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
assert blob.language, "No language for #{sample[:path]}"
assert_equal sample[:language], blob.language.name, blob.name
end
# Test language detection for files which shouldn't be used as samples
root = File.expand_path('../fixtures', __FILE__)
Dir.entries(root).each do |language|
next unless File.file?(language)
# Each directory contains test files of a language
dirname = File.join(root, language)
Dir.entries(dirname).each do |filename|
next unless File.file?(filename)
# By default blob search the file in the samples;
# thus, we need to give it the absolute path
filepath = File.join(dirname, filename)
blob = blob(filepath)
assert blob.language, "No language for #{filepath}"
assert_equal language, blob.language.name, blob.name
end
end
end
def test_minified_files_not_safe_to_highlight

View File

@@ -1,9 +1,4 @@
require 'linguist/classifier'
require 'linguist/language'
require 'linguist/samples'
require 'linguist/tokenizer'
require 'test/unit'
require_relative "./helper"
class TestClassifier < Test::Unit::TestCase
include Linguist

10
test/test_file_blob.rb Normal file
View File

@@ -0,0 +1,10 @@
require 'linguist/file_blob'
require 'test/unit'
class TestFileBlob < Test::Unit::TestCase
def test_extensions
assert_equal [".gitignore"], Linguist::FileBlob.new(".gitignore").extensions
assert_equal [".xml"], Linguist::FileBlob.new("build.xml").extensions
assert_equal [".html.erb", ".erb"], Linguist::FileBlob.new("dotted.dir/index.html.erb").extensions
end
end

View File

@@ -1,9 +1,4 @@
require 'linguist/heuristics'
require 'linguist/language'
require 'linguist/samples'
require 'linguist/file_blob'
require 'test/unit'
require_relative "./helper"
class TestHeuristcs < Test::Unit::TestCase
include Linguist

View File

@@ -1,6 +1,4 @@
require 'linguist/language'
require 'test/unit'
require 'yaml'
require_relative "./helper"
class TestLanguage < Test::Unit::TestCase
include Linguist

View File

@@ -1,6 +1,4 @@
require 'linguist/md5'
require 'test/unit'
require_relative "./helper"
class TestMD5 < Test::Unit::TestCase
include Linguist

View File

@@ -1,5 +1,4 @@
require 'test/unit'
require 'yaml'
require_relative "./helper"
class TestPedantic < Test::Unit::TestCase
filename = File.expand_path("../../lib/linguist/languages.yml", __FILE__)

View File

@@ -1,6 +1,4 @@
require 'linguist/repository'
require 'linguist/lazy_blob'
require 'test/unit'
require_relative "./helper"
class TestRepository < Test::Unit::TestCase
def rugged_repository

View File

@@ -1,8 +1,5 @@
require 'linguist/samples'
require 'linguist/language'
require 'tempfile'
require 'yajl'
require 'test/unit'
require_relative "./helper"
require "tempfile"
class TestSamples < Test::Unit::TestCase
include Linguist
@@ -34,23 +31,29 @@ class TestSamples < Test::Unit::TestCase
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } }
assert !data["interpreters"].empty?
end
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
def test_parity
extensions = Samples.cache['extnames']
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
languages = YAML.load_file(languages_yml)
languages.each do |name, options|
# Check that there aren't samples with extensions or interpreters that
# aren't explicitly defined in languages.yml
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
YAML.load_file(languages_yml).each do |name, options|
define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do
options['extensions'] ||= []
if extnames = extensions[name]
if extnames = Samples.cache['extnames'][name]
extnames.each do |extname|
next if extname == '.script!'
assert options['extensions'].include?(extname), "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml"
end
end
options['interpreters'] ||= []
if interpreters = Samples.cache['interpreters'][name]
interpreters.each do |interpreter|
# next if extname == '.script!'
assert options['interpreters'].include?(interpreter), "#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml"
end
end
end
end
@@ -79,4 +82,9 @@ class TestSamples < Test::Unit::TestCase
end
end
end
def test_shebang
assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
end
end

View File

@@ -1,6 +1,4 @@
require 'linguist/tokenizer'
require 'test/unit'
require_relative "./helper"
class TestTokenizer < Test::Unit::TestCase
include Linguist

BIN
vendor/cache/byebug-3.5.1.gem vendored Normal file

Binary file not shown.

BIN
vendor/cache/columnize-0.8.9.gem vendored Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
vendor/cache/rugged-0.22.0b4.gem vendored Normal file

Binary file not shown.