mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge pull request #1787 from github/move-shebang
Move shebang (updated)
This commit is contained in:
@@ -4,4 +4,5 @@ require 'linguist/heuristics'
|
||||
require 'linguist/language'
|
||||
require 'linguist/repository'
|
||||
require 'linguist/samples'
|
||||
require 'linguist/shebang'
|
||||
require 'linguist/version'
|
||||
|
||||
@@ -11,7 +11,7 @@ require 'linguist/samples'
|
||||
require 'linguist/file_blob'
|
||||
require 'linguist/blob_helper'
|
||||
require 'linguist/strategy/filename'
|
||||
require 'linguist/strategy/shebang'
|
||||
require 'linguist/shebang'
|
||||
|
||||
module Linguist
|
||||
# Language names that are recognizable by GitHub. Defined languages
|
||||
@@ -95,7 +95,7 @@ module Linguist
|
||||
|
||||
STRATEGIES = [
|
||||
Linguist::Strategy::Filename,
|
||||
Linguist::Strategy::Shebang,
|
||||
Linguist::Shebang,
|
||||
Linguist::Heuristics,
|
||||
Linguist::Classifier
|
||||
]
|
||||
@@ -199,20 +199,26 @@ module Linguist
|
||||
@extension_index[extname]
|
||||
end
|
||||
|
||||
# Public: Look up Languages by shebang line.
|
||||
# DEPRECATED
|
||||
def self.find_by_shebang(data)
|
||||
@interpreter_index[Shebang.interpreter(data)]
|
||||
end
|
||||
|
||||
# Public: Look up Languages by interpreter.
|
||||
#
|
||||
# data - Array of tokens or String data to analyze.
|
||||
# interpreter - String of interpreter name
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# Language.find_by_shebang("#!/bin/bash\ndate;")
|
||||
# Language.find_by_interpreter("bash")
|
||||
# # => [#<Language name="Bash">]
|
||||
#
|
||||
# Returns the matching Language
|
||||
def self.find_by_shebang(data)
|
||||
@interpreter_index[Linguist.interpreter_from_shebang(data)]
|
||||
def self.find_by_interpreter(interpreter)
|
||||
@interpreter_index[interpreter]
|
||||
end
|
||||
|
||||
|
||||
# Public: Look up Language by its name or lexer.
|
||||
#
|
||||
# name - The String name of the Language
|
||||
|
||||
@@ -6,6 +6,7 @@ end
|
||||
|
||||
require 'linguist/md5'
|
||||
require 'linguist/classifier'
|
||||
require 'linguist/shebang'
|
||||
|
||||
module Linguist
|
||||
# Model for accessing classifier training data.
|
||||
@@ -61,7 +62,7 @@ module Linguist
|
||||
yield({
|
||||
:path => path,
|
||||
:language => category,
|
||||
:interpreter => Linguist.interpreter_from_shebang(File.read(path)),
|
||||
:interpreter => Shebang.interpreter(File.read(path)),
|
||||
:extname => File.extname(filename)
|
||||
})
|
||||
end
|
||||
@@ -114,41 +115,4 @@ module Linguist
|
||||
db
|
||||
end
|
||||
end
|
||||
|
||||
# Used to retrieve the interpreter from the shebang line of a file's
|
||||
# data.
|
||||
def self.interpreter_from_shebang(data)
|
||||
lines = data.lines.to_a
|
||||
|
||||
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
||||
bang.sub!(/^#! /, '#!')
|
||||
tokens = bang.split(' ')
|
||||
pieces = tokens.first.split('/')
|
||||
|
||||
if pieces.size > 1
|
||||
script = pieces.last
|
||||
else
|
||||
script = pieces.first.sub('#!', '')
|
||||
end
|
||||
|
||||
script = script == 'env' ? tokens[1] : script
|
||||
|
||||
# If script has an invalid shebang, we might get here
|
||||
return unless script
|
||||
|
||||
# "python2.6" -> "python2"
|
||||
script.sub! $1, '' if script =~ /(\.\d+)$/
|
||||
|
||||
# Check for multiline shebang hacks that call `exec`
|
||||
if script == 'sh' &&
|
||||
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
||||
script = $1
|
||||
end
|
||||
|
||||
File.basename(script)
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
44
lib/linguist/shebang.rb
Normal file
44
lib/linguist/shebang.rb
Normal file
@@ -0,0 +1,44 @@
|
||||
module Linguist
|
||||
class Shebang
|
||||
# Public: Use shebang to detect language of the blob.
|
||||
#
|
||||
# blob - An object that quacks like a blob.
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# Shebang.call(FileBlob.new("path/to/file"))
|
||||
#
|
||||
# Returns an Array with one Language if the blob has a shebang with a valid
|
||||
# interpreter, or empty if there is no shebang.
|
||||
def self.call(blob, _ = nil)
|
||||
Language.find_by_interpreter interpreter(blob.data)
|
||||
end
|
||||
|
||||
# Public: Get the interpreter from the shebang
|
||||
#
|
||||
# Returns a String or nil
|
||||
def self.interpreter(data)
|
||||
lines = data.lines
|
||||
return unless match = /^#! ?(.*)$/.match(lines.first)
|
||||
|
||||
tokens = match[1].split(' ')
|
||||
script = tokens.first.split('/').last
|
||||
|
||||
script = tokens[1] if script == 'env'
|
||||
|
||||
# If script has an invalid shebang, we might get here
|
||||
return unless script
|
||||
|
||||
# "python2.6" -> "python2"
|
||||
script.sub! $1, '' if script =~ /(\.\d+)$/
|
||||
|
||||
# Check for multiline shebang hacks that call `exec`
|
||||
if script == 'sh' &&
|
||||
lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
||||
script = $1
|
||||
end
|
||||
|
||||
File.basename(script)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,10 +0,0 @@
|
||||
module Linguist
|
||||
module Strategy
|
||||
# Check if there's a shebang line and use that as authoritative
|
||||
class Shebang
|
||||
def self.call(blob, _)
|
||||
Language.find_by_shebang(blob.data)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -223,34 +223,21 @@ class TestLanguage < Test::Unit::TestCase
|
||||
assert_equal [Language['Chapel']], Language.find_by_filename('examples/hello.chpl')
|
||||
end
|
||||
|
||||
def test_find_by_shebang
|
||||
assert_equal 'ruby', Linguist.interpreter_from_shebang("#!/usr/bin/ruby\n# baz")
|
||||
{ [] => ["",
|
||||
"foo",
|
||||
"#bar",
|
||||
"#baz",
|
||||
"///",
|
||||
"\n\n\n\n\n",
|
||||
" #!/usr/sbin/ruby",
|
||||
"\n#!/usr/sbin/ruby"],
|
||||
['Ruby'] => ["#!/usr/bin/env ruby\n# baz",
|
||||
"#!/usr/sbin/ruby\n# bar",
|
||||
"#!/usr/bin/ruby\n# foo",
|
||||
"#!/usr/sbin/ruby",
|
||||
"#!/usr/sbin/ruby foo bar baz\n"],
|
||||
['R'] => ["#!/usr/bin/env Rscript\n# example R script\n#\n"],
|
||||
['Shell'] => ["#!/usr/bin/bash\n", "#!/bin/sh"],
|
||||
['Python'] => ["#!/bin/python\n# foo\n# bar\n# baz",
|
||||
"#!/usr/bin/python2.7\n\n\n\n",
|
||||
"#!/usr/bin/python3\n\n\n\n"],
|
||||
["Common Lisp"] => ["#!/usr/bin/sbcl --script\n\n"]
|
||||
}.each do |languages, bodies|
|
||||
bodies.each do |body|
|
||||
assert_equal([body, languages.map{|l| Language[l]}],
|
||||
[body, Language.find_by_shebang(body)])
|
||||
def test_find_by_interpreter
|
||||
{
|
||||
"ruby" => "Ruby",
|
||||
"Rscript" => "R",
|
||||
"sh" => "Shell",
|
||||
"bash" => "Shell",
|
||||
"python" => "Python",
|
||||
"python2" => "Python",
|
||||
"python3" => "Python",
|
||||
"sbcl" => "Common Lisp"
|
||||
}.each do |interpreter, language|
|
||||
assert_equal [Language[language]], Language.find_by_interpreter(interpreter)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
assert_equal [], Language.find_by_interpreter(nil)
|
||||
end
|
||||
|
||||
def test_find
|
||||
|
||||
@@ -82,9 +82,4 @@ class TestSamples < Test::Unit::TestCase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_shebang
|
||||
assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
|
||||
assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
|
||||
end
|
||||
end
|
||||
|
||||
38
test/test_shebang.rb
Normal file
38
test/test_shebang.rb
Normal file
@@ -0,0 +1,38 @@
|
||||
require_relative "./helper"
|
||||
|
||||
class TestShebang < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
def assert_interpreter(interpreter, body)
|
||||
assert_equal interpreter, Shebang.interpreter(body)
|
||||
end
|
||||
|
||||
def test_shebangs
|
||||
assert_interpreter nil, ""
|
||||
assert_interpreter nil, "foo"
|
||||
assert_interpreter nil, "#bar"
|
||||
assert_interpreter nil, "#baz"
|
||||
assert_interpreter nil, "///"
|
||||
assert_interpreter nil, "\n\n\n\n\n"
|
||||
assert_interpreter nil, " #!/usr/sbin/ruby"
|
||||
assert_interpreter nil, "\n#!/usr/sbin/ruby"
|
||||
|
||||
assert_interpreter "ruby", "#!/usr/sbin/ruby\n# bar"
|
||||
assert_interpreter "ruby", "#!/usr/bin/ruby\n# foo"
|
||||
assert_interpreter "ruby", "#!/usr/sbin/ruby"
|
||||
assert_interpreter "ruby", "#!/usr/sbin/ruby foo bar baz\n"
|
||||
|
||||
assert_interpreter "Rscript", "#!/usr/bin/env Rscript\n# example R script\n#\n"
|
||||
assert_interpreter "crystal", "#!/usr/bin/env bin/crystal"
|
||||
assert_interpreter "ruby", "#!/usr/bin/env ruby\n# baz"
|
||||
|
||||
assert_interpreter "bash", "#!/usr/bin/bash\n"
|
||||
assert_interpreter "sh", "#!/bin/sh"
|
||||
assert_interpreter "python", "#!/bin/python\n# foo\n# bar\n# baz"
|
||||
assert_interpreter "python2", "#!/usr/bin/python2.7\n\n\n\n"
|
||||
assert_interpreter "python3", "#!/usr/bin/python3\n\n\n\n"
|
||||
assert_interpreter "sbcl", "#!/usr/bin/sbcl --script\n\n"
|
||||
assert_interpreter "perl", "#! perl"
|
||||
end
|
||||
|
||||
end
|
||||
Reference in New Issue
Block a user