Merge remote-tracking branch 'origin/master' into interpreters-in-samples

* origin/master:
  byebug requires ruby 2.0
  Remove test for removed extension
  Merge branch 'master' into 1233-local
  Removing pry runtime dependency
  Moving to fixtures
  Language detection test for non-sample files
  Refactoring of Language.detect
  Try shebang detection if the extension is unknown
  Change unknown extension of PHP sample file
This commit is contained in:
Brandon Keepers
2014-11-26 16:25:15 -05:00
7 changed files with 1581 additions and 24 deletions

View File

@@ -2,4 +2,4 @@ source 'https://rubygems.org'
gemspec :name => "github-linguist"
gemspec :name => "github-linguist-grammars"
gem 'test-unit', require: false if RUBY_VERSION >= '2.2'
gem 'byebug'
gem 'byebug' if RUBY_VERSION >= '2.0'

View File

@@ -111,35 +111,47 @@ module Linguist
name += ".script!"
end
# First try to find languages that match based on filename.
# Find languages that match based on filename.
possible_languages = find_by_filename(name)
if possible_languages.length == 1
# Simplest and most common case, we can just return the one match based
# on extension
possible_languages.first
# If there is more than one possible language with that extension (or no
# extension at all, in the case of extensionless scripts), we need to continue
# our detection work
if possible_languages.length > 1
# extension at all, in the case of extensionless scripts), we need to
# continue our detection work
else
# Matches possible_languages.length == 0 || possible_languages.length > 0
data = blob.data
# Check if there's a shebang line and use that as authoritative
if (result = find_by_shebang(data)) && !result.empty?
return result.first
# More than one language with that extension. We need to make a choice.
elsif possible_languages.length > 1
# First try heuristics
possible_language_names = possible_languages.map(&:name)
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
# If there are multiple possible languages returned from heuristics
# then reduce language candidates for Bayesian classifier here.
if heuristic_languages.size > 1
possible_language_names = heuristic_languages.map(&:name)
end
# Check if there's a shebang line and use that as authoritative
if (result = find_by_shebang(data)) && !result.empty?
result.first
# No shebang. Still more work to do. Try to find it with our heuristics.
elsif heuristic_languages.size == 1
heuristic_languages.first
if heuristic_languages.size == 1
return heuristic_languages.first
# Lastly, fall back to the probabilistic classifier.
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]]
return Language[classified[0]]
end
end
else
# Simplest and most common case, we can just return the one match based on extension
possible_languages.first
end
end

22
test/fixtures/Python/run_tests.module vendored Normal file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python
import sys, os
# Set the current working directory to the directory where this script is located
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
#### Set the name of the application here and moose directory relative to the application
app_name = 'stork'
MODULE_DIR = os.path.abspath('..')
MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
#### See if MOOSE_DIR is already in the environment instead
if os.environ.has_key("MOOSE_DIR"):
MOOSE_DIR = os.environ['MOOSE_DIR']
sys.path.append(os.path.join(MOOSE_DIR, 'python'))
import path_tool
path_tool.activate_module('TestHarness')
from TestHarness import TestHarness
# Run the tests!
TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)

1505
test/fixtures/Shell/mintleaf.module vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -465,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
assert blob.language, "No language for #{sample[:path]}"
assert_equal sample[:language], blob.language.name, blob.name
end
# Test language detection for files which shouldn't be used as samples
root = File.expand_path('../fixtures', __FILE__)
Dir.entries(root).each do |language|
next unless File.file?(language)
# Each directory contains test files of a language
dirname = File.join(root, language)
Dir.entries(dirname).each do |filename|
next unless File.file?(filename)
# By default blob search the file in the samples;
# thus, we need to give it the absolute path
filepath = File.join(dirname, filename)
blob = blob(filepath)
assert blob.language, "No language for #{filepath}"
assert_equal language, blob.language.name, blob.name
end
end
end
def test_minified_files_not_safe_to_highlight

View File

@@ -221,7 +221,6 @@ class TestLanguage < Test::Unit::TestCase
assert_equal [Language['Clojure']], Language.find_by_filename('riemann.config')
assert_equal [Language['HTML+Django']], Language.find_by_filename('index.jinja')
assert_equal [Language['Chapel']], Language.find_by_filename('examples/hello.chpl')
assert_equal [Language['Ant Build System']], Language.find_by_filename('build.ant.xml')
end
def test_find_by_shebang