mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge pull request #1787 from github/move-shebang
Move shebang (updated)
This commit is contained in:
@@ -4,4 +4,5 @@ require 'linguist/heuristics'
|
|||||||
require 'linguist/language'
|
require 'linguist/language'
|
||||||
require 'linguist/repository'
|
require 'linguist/repository'
|
||||||
require 'linguist/samples'
|
require 'linguist/samples'
|
||||||
|
require 'linguist/shebang'
|
||||||
require 'linguist/version'
|
require 'linguist/version'
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ require 'linguist/samples'
|
|||||||
require 'linguist/file_blob'
|
require 'linguist/file_blob'
|
||||||
require 'linguist/blob_helper'
|
require 'linguist/blob_helper'
|
||||||
require 'linguist/strategy/filename'
|
require 'linguist/strategy/filename'
|
||||||
require 'linguist/strategy/shebang'
|
require 'linguist/shebang'
|
||||||
|
|
||||||
module Linguist
|
module Linguist
|
||||||
# Language names that are recognizable by GitHub. Defined languages
|
# Language names that are recognizable by GitHub. Defined languages
|
||||||
@@ -95,7 +95,7 @@ module Linguist
|
|||||||
|
|
||||||
STRATEGIES = [
|
STRATEGIES = [
|
||||||
Linguist::Strategy::Filename,
|
Linguist::Strategy::Filename,
|
||||||
Linguist::Strategy::Shebang,
|
Linguist::Shebang,
|
||||||
Linguist::Heuristics,
|
Linguist::Heuristics,
|
||||||
Linguist::Classifier
|
Linguist::Classifier
|
||||||
]
|
]
|
||||||
@@ -199,20 +199,26 @@ module Linguist
|
|||||||
@extension_index[extname]
|
@extension_index[extname]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Public: Look up Languages by shebang line.
|
# DEPRECATED
|
||||||
|
def self.find_by_shebang(data)
|
||||||
|
@interpreter_index[Shebang.interpreter(data)]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Public: Look up Languages by interpreter.
|
||||||
#
|
#
|
||||||
# data - Array of tokens or String data to analyze.
|
# interpreter - String of interpreter name
|
||||||
#
|
#
|
||||||
# Examples
|
# Examples
|
||||||
#
|
#
|
||||||
# Language.find_by_shebang("#!/bin/bash\ndate;")
|
# Language.find_by_interpreter("bash")
|
||||||
# # => [#<Language name="Bash">]
|
# # => [#<Language name="Bash">]
|
||||||
#
|
#
|
||||||
# Returns the matching Language
|
# Returns the matching Language
|
||||||
def self.find_by_shebang(data)
|
def self.find_by_interpreter(interpreter)
|
||||||
@interpreter_index[Linguist.interpreter_from_shebang(data)]
|
@interpreter_index[interpreter]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# Public: Look up Language by its name or lexer.
|
# Public: Look up Language by its name or lexer.
|
||||||
#
|
#
|
||||||
# name - The String name of the Language
|
# name - The String name of the Language
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ end
|
|||||||
|
|
||||||
require 'linguist/md5'
|
require 'linguist/md5'
|
||||||
require 'linguist/classifier'
|
require 'linguist/classifier'
|
||||||
|
require 'linguist/shebang'
|
||||||
|
|
||||||
module Linguist
|
module Linguist
|
||||||
# Model for accessing classifier training data.
|
# Model for accessing classifier training data.
|
||||||
@@ -61,7 +62,7 @@ module Linguist
|
|||||||
yield({
|
yield({
|
||||||
:path => path,
|
:path => path,
|
||||||
:language => category,
|
:language => category,
|
||||||
:interpreter => Linguist.interpreter_from_shebang(File.read(path)),
|
:interpreter => Shebang.interpreter(File.read(path)),
|
||||||
:extname => File.extname(filename)
|
:extname => File.extname(filename)
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
@@ -114,41 +115,4 @@ module Linguist
|
|||||||
db
|
db
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Used to retrieve the interpreter from the shebang line of a file's
|
|
||||||
# data.
|
|
||||||
def self.interpreter_from_shebang(data)
|
|
||||||
lines = data.lines.to_a
|
|
||||||
|
|
||||||
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
|
||||||
bang.sub!(/^#! /, '#!')
|
|
||||||
tokens = bang.split(' ')
|
|
||||||
pieces = tokens.first.split('/')
|
|
||||||
|
|
||||||
if pieces.size > 1
|
|
||||||
script = pieces.last
|
|
||||||
else
|
|
||||||
script = pieces.first.sub('#!', '')
|
|
||||||
end
|
|
||||||
|
|
||||||
script = script == 'env' ? tokens[1] : script
|
|
||||||
|
|
||||||
# If script has an invalid shebang, we might get here
|
|
||||||
return unless script
|
|
||||||
|
|
||||||
# "python2.6" -> "python2"
|
|
||||||
script.sub! $1, '' if script =~ /(\.\d+)$/
|
|
||||||
|
|
||||||
# Check for multiline shebang hacks that call `exec`
|
|
||||||
if script == 'sh' &&
|
|
||||||
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
|
||||||
script = $1
|
|
||||||
end
|
|
||||||
|
|
||||||
File.basename(script)
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|||||||
44
lib/linguist/shebang.rb
Normal file
44
lib/linguist/shebang.rb
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
module Linguist
|
||||||
|
class Shebang
|
||||||
|
# Public: Use shebang to detect language of the blob.
|
||||||
|
#
|
||||||
|
# blob - An object that quacks like a blob.
|
||||||
|
#
|
||||||
|
# Examples
|
||||||
|
#
|
||||||
|
# Shebang.call(FileBlob.new("path/to/file"))
|
||||||
|
#
|
||||||
|
# Returns an Array with one Language if the blob has a shebang with a valid
|
||||||
|
# interpreter, or empty if there is no shebang.
|
||||||
|
def self.call(blob, _ = nil)
|
||||||
|
Language.find_by_interpreter interpreter(blob.data)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Public: Get the interpreter from the shebang
|
||||||
|
#
|
||||||
|
# Returns a String or nil
|
||||||
|
def self.interpreter(data)
|
||||||
|
lines = data.lines
|
||||||
|
return unless match = /^#! ?(.*)$/.match(lines.first)
|
||||||
|
|
||||||
|
tokens = match[1].split(' ')
|
||||||
|
script = tokens.first.split('/').last
|
||||||
|
|
||||||
|
script = tokens[1] if script == 'env'
|
||||||
|
|
||||||
|
# If script has an invalid shebang, we might get here
|
||||||
|
return unless script
|
||||||
|
|
||||||
|
# "python2.6" -> "python2"
|
||||||
|
script.sub! $1, '' if script =~ /(\.\d+)$/
|
||||||
|
|
||||||
|
# Check for multiline shebang hacks that call `exec`
|
||||||
|
if script == 'sh' &&
|
||||||
|
lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
||||||
|
script = $1
|
||||||
|
end
|
||||||
|
|
||||||
|
File.basename(script)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
module Linguist
|
|
||||||
module Strategy
|
|
||||||
# Check if there's a shebang line and use that as authoritative
|
|
||||||
class Shebang
|
|
||||||
def self.call(blob, _)
|
|
||||||
Language.find_by_shebang(blob.data)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@@ -223,34 +223,21 @@ class TestLanguage < Test::Unit::TestCase
|
|||||||
assert_equal [Language['Chapel']], Language.find_by_filename('examples/hello.chpl')
|
assert_equal [Language['Chapel']], Language.find_by_filename('examples/hello.chpl')
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_find_by_shebang
|
def test_find_by_interpreter
|
||||||
assert_equal 'ruby', Linguist.interpreter_from_shebang("#!/usr/bin/ruby\n# baz")
|
{
|
||||||
{ [] => ["",
|
"ruby" => "Ruby",
|
||||||
"foo",
|
"Rscript" => "R",
|
||||||
"#bar",
|
"sh" => "Shell",
|
||||||
"#baz",
|
"bash" => "Shell",
|
||||||
"///",
|
"python" => "Python",
|
||||||
"\n\n\n\n\n",
|
"python2" => "Python",
|
||||||
" #!/usr/sbin/ruby",
|
"python3" => "Python",
|
||||||
"\n#!/usr/sbin/ruby"],
|
"sbcl" => "Common Lisp"
|
||||||
['Ruby'] => ["#!/usr/bin/env ruby\n# baz",
|
}.each do |interpreter, language|
|
||||||
"#!/usr/sbin/ruby\n# bar",
|
assert_equal [Language[language]], Language.find_by_interpreter(interpreter)
|
||||||
"#!/usr/bin/ruby\n# foo",
|
|
||||||
"#!/usr/sbin/ruby",
|
|
||||||
"#!/usr/sbin/ruby foo bar baz\n"],
|
|
||||||
['R'] => ["#!/usr/bin/env Rscript\n# example R script\n#\n"],
|
|
||||||
['Shell'] => ["#!/usr/bin/bash\n", "#!/bin/sh"],
|
|
||||||
['Python'] => ["#!/bin/python\n# foo\n# bar\n# baz",
|
|
||||||
"#!/usr/bin/python2.7\n\n\n\n",
|
|
||||||
"#!/usr/bin/python3\n\n\n\n"],
|
|
||||||
["Common Lisp"] => ["#!/usr/bin/sbcl --script\n\n"]
|
|
||||||
}.each do |languages, bodies|
|
|
||||||
bodies.each do |body|
|
|
||||||
assert_equal([body, languages.map{|l| Language[l]}],
|
|
||||||
[body, Language.find_by_shebang(body)])
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
assert_equal [], Language.find_by_interpreter(nil)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_find
|
def test_find
|
||||||
|
|||||||
@@ -82,9 +82,4 @@ class TestSamples < Test::Unit::TestCase
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_shebang
|
|
||||||
assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
|
|
||||||
assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|||||||
38
test/test_shebang.rb
Normal file
38
test/test_shebang.rb
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
require_relative "./helper"
|
||||||
|
|
||||||
|
class TestShebang < Test::Unit::TestCase
|
||||||
|
include Linguist
|
||||||
|
|
||||||
|
def assert_interpreter(interpreter, body)
|
||||||
|
assert_equal interpreter, Shebang.interpreter(body)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_shebangs
|
||||||
|
assert_interpreter nil, ""
|
||||||
|
assert_interpreter nil, "foo"
|
||||||
|
assert_interpreter nil, "#bar"
|
||||||
|
assert_interpreter nil, "#baz"
|
||||||
|
assert_interpreter nil, "///"
|
||||||
|
assert_interpreter nil, "\n\n\n\n\n"
|
||||||
|
assert_interpreter nil, " #!/usr/sbin/ruby"
|
||||||
|
assert_interpreter nil, "\n#!/usr/sbin/ruby"
|
||||||
|
|
||||||
|
assert_interpreter "ruby", "#!/usr/sbin/ruby\n# bar"
|
||||||
|
assert_interpreter "ruby", "#!/usr/bin/ruby\n# foo"
|
||||||
|
assert_interpreter "ruby", "#!/usr/sbin/ruby"
|
||||||
|
assert_interpreter "ruby", "#!/usr/sbin/ruby foo bar baz\n"
|
||||||
|
|
||||||
|
assert_interpreter "Rscript", "#!/usr/bin/env Rscript\n# example R script\n#\n"
|
||||||
|
assert_interpreter "crystal", "#!/usr/bin/env bin/crystal"
|
||||||
|
assert_interpreter "ruby", "#!/usr/bin/env ruby\n# baz"
|
||||||
|
|
||||||
|
assert_interpreter "bash", "#!/usr/bin/bash\n"
|
||||||
|
assert_interpreter "sh", "#!/bin/sh"
|
||||||
|
assert_interpreter "python", "#!/bin/python\n# foo\n# bar\n# baz"
|
||||||
|
assert_interpreter "python2", "#!/usr/bin/python2.7\n\n\n\n"
|
||||||
|
assert_interpreter "python3", "#!/usr/bin/python3\n\n\n\n"
|
||||||
|
assert_interpreter "sbcl", "#!/usr/bin/sbcl --script\n\n"
|
||||||
|
assert_interpreter "perl", "#! perl"
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user