mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge pull request #1538 from github/1233-local
Detection based on the shebang (updated)
This commit is contained in:
@@ -111,35 +111,47 @@ module Linguist
|
||||
name += ".script!"
|
||||
end
|
||||
|
||||
# First try to find languages that match based on filename.
|
||||
# Find languages that match based on filename.
|
||||
possible_languages = find_by_filename(name)
|
||||
|
||||
# If there is more than one possible language with that extension (or no
|
||||
# extension at all, in the case of extensionless scripts), we need to continue
|
||||
# our detection work
|
||||
if possible_languages.length > 1
|
||||
data = blob.data
|
||||
possible_language_names = possible_languages.map(&:name)
|
||||
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
|
||||
if possible_languages.length == 1
|
||||
# Simplest and most common case, we can just return the one match based
|
||||
# on extension
|
||||
possible_languages.first
|
||||
|
||||
if heuristic_languages.size > 1
|
||||
possible_language_names = heuristic_languages.map(&:name)
|
||||
end
|
||||
# If there is more than one possible language with that extension (or no
|
||||
# extension at all, in the case of extensionless scripts), we need to
|
||||
# continue our detection work
|
||||
else
|
||||
# Matches possible_languages.length == 0 || possible_languages.length > 0
|
||||
data = blob.data
|
||||
|
||||
# Check if there's a shebang line and use that as authoritative
|
||||
if (result = find_by_shebang(data)) && !result.empty?
|
||||
result.first
|
||||
# No shebang. Still more work to do. Try to find it with our heuristics.
|
||||
elsif heuristic_languages.size == 1
|
||||
heuristic_languages.first
|
||||
# Lastly, fall back to the probabilistic classifier.
|
||||
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||
Language[classified[0]]
|
||||
return result.first
|
||||
|
||||
# More than one language with that extension. We need to make a choice.
|
||||
elsif possible_languages.length > 1
|
||||
|
||||
# First try heuristics
|
||||
|
||||
possible_language_names = possible_languages.map(&:name)
|
||||
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
|
||||
|
||||
# If there are multiple possible languages returned from heuristics
|
||||
# then reduce language candidates for Bayesian classifier here.
|
||||
if heuristic_languages.size > 1
|
||||
possible_language_names = heuristic_languages.map(&:name)
|
||||
end
|
||||
|
||||
if heuristic_languages.size == 1
|
||||
return heuristic_languages.first
|
||||
# Lastly, fall back to the probabilistic classifier.
|
||||
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||
return Language[classified[0]]
|
||||
end
|
||||
end
|
||||
else
|
||||
# Simplest and most common case, we can just return the one match based on extension
|
||||
possible_languages.first
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
22
test/fixtures/Python/run_tests.module
vendored
Normal file
22
test/fixtures/Python/run_tests.module
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, os
|
||||
|
||||
# Set the current working directory to the directory where this script is located
|
||||
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
|
||||
|
||||
#### Set the name of the application here and moose directory relative to the application
|
||||
app_name = 'stork'
|
||||
|
||||
MODULE_DIR = os.path.abspath('..')
|
||||
MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
|
||||
#### See if MOOSE_DIR is already in the environment instead
|
||||
if os.environ.has_key("MOOSE_DIR"):
|
||||
MOOSE_DIR = os.environ['MOOSE_DIR']
|
||||
|
||||
sys.path.append(os.path.join(MOOSE_DIR, 'python'))
|
||||
import path_tool
|
||||
path_tool.activate_module('TestHarness')
|
||||
|
||||
from TestHarness import TestHarness
|
||||
# Run the tests!
|
||||
TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)
|
||||
1505
test/fixtures/Shell/mintleaf.module
vendored
Normal file
1505
test/fixtures/Shell/mintleaf.module
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@@ -465,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
|
||||
assert blob.language, "No language for #{sample[:path]}"
|
||||
assert_equal sample[:language], blob.language.name, blob.name
|
||||
end
|
||||
|
||||
# Test language detection for files which shouldn't be used as samples
|
||||
root = File.expand_path('../fixtures', __FILE__)
|
||||
Dir.entries(root).each do |language|
|
||||
next unless File.file?(language)
|
||||
|
||||
# Each directory contains test files of a language
|
||||
dirname = File.join(root, language)
|
||||
Dir.entries(dirname).each do |filename|
|
||||
next unless File.file?(filename)
|
||||
|
||||
# By default blob search the file in the samples;
|
||||
# thus, we need to give it the absolute path
|
||||
filepath = File.join(dirname, filename)
|
||||
blob = blob(filepath)
|
||||
assert blob.language, "No language for #{filepath}"
|
||||
assert_equal language, blob.language.name, blob.name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_minified_files_not_safe_to_highlight
|
||||
|
||||
Reference in New Issue
Block a user