Merge pull request #1787 from github/move-shebang

Move shebang (updated)
This commit is contained in:
Brandon Keepers
2014-11-28 18:02:04 -06:00
8 changed files with 112 additions and 87 deletions

View File

@@ -4,4 +4,5 @@ require 'linguist/heuristics'
require 'linguist/language' require 'linguist/language'
require 'linguist/repository' require 'linguist/repository'
require 'linguist/samples' require 'linguist/samples'
require 'linguist/shebang'
require 'linguist/version' require 'linguist/version'

View File

@@ -11,7 +11,7 @@ require 'linguist/samples'
require 'linguist/file_blob' require 'linguist/file_blob'
require 'linguist/blob_helper' require 'linguist/blob_helper'
require 'linguist/strategy/filename' require 'linguist/strategy/filename'
require 'linguist/strategy/shebang' require 'linguist/shebang'
module Linguist module Linguist
# Language names that are recognizable by GitHub. Defined languages # Language names that are recognizable by GitHub. Defined languages
@@ -95,7 +95,7 @@ module Linguist
STRATEGIES = [ STRATEGIES = [
Linguist::Strategy::Filename, Linguist::Strategy::Filename,
Linguist::Strategy::Shebang, Linguist::Shebang,
Linguist::Heuristics, Linguist::Heuristics,
Linguist::Classifier Linguist::Classifier
] ]
@@ -199,20 +199,26 @@ module Linguist
@extension_index[extname] @extension_index[extname]
end end
# Public: Look up Languages by shebang line. # DEPRECATED
def self.find_by_shebang(data)
@interpreter_index[Shebang.interpreter(data)]
end
# Public: Look up Languages by interpreter.
# #
# data - Array of tokens or String data to analyze. # interpreter - String of interpreter name
# #
# Examples # Examples
# #
# Language.find_by_shebang("#!/bin/bash\ndate;") # Language.find_by_interpreter("bash")
# # => [#<Language name="Bash">] # # => [#<Language name="Bash">]
# #
# Returns the matching Language # Returns the matching Language
def self.find_by_shebang(data) def self.find_by_interpreter(interpreter)
@interpreter_index[Linguist.interpreter_from_shebang(data)] @interpreter_index[interpreter]
end end
# Public: Look up Language by its name or lexer. # Public: Look up Language by its name or lexer.
# #
# name - The String name of the Language # name - The String name of the Language

View File

@@ -6,6 +6,7 @@ end
require 'linguist/md5' require 'linguist/md5'
require 'linguist/classifier' require 'linguist/classifier'
require 'linguist/shebang'
module Linguist module Linguist
# Model for accessing classifier training data. # Model for accessing classifier training data.
@@ -61,7 +62,7 @@ module Linguist
yield({ yield({
:path => path, :path => path,
:language => category, :language => category,
:interpreter => Linguist.interpreter_from_shebang(File.read(path)), :interpreter => Shebang.interpreter(File.read(path)),
:extname => File.extname(filename) :extname => File.extname(filename)
}) })
end end
@@ -114,41 +115,4 @@ module Linguist
db db
end end
end end
# Used to retrieve the interpreter from the shebang line of a file's
# data.
def self.interpreter_from_shebang(data)
lines = data.lines.to_a
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
bang.sub!(/^#! /, '#!')
tokens = bang.split(' ')
pieces = tokens.first.split('/')
if pieces.size > 1
script = pieces.last
else
script = pieces.first.sub('#!', '')
end
script = script == 'env' ? tokens[1] : script
# If script has an invalid shebang, we might get here
return unless script
# "python2.6" -> "python2"
script.sub! $1, '' if script =~ /(\.\d+)$/
# Check for multiline shebang hacks that call `exec`
if script == 'sh' &&
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
script = $1
end
File.basename(script)
else
nil
end
end
end end

44
lib/linguist/shebang.rb Normal file
View File

@@ -0,0 +1,44 @@
module Linguist
class Shebang
# Public: Use shebang to detect language of the blob.
#
# blob - An object that quacks like a blob.
#
# Examples
#
# Shebang.call(FileBlob.new("path/to/file"))
#
# Returns an Array with one Language if the blob has a shebang with a valid
# interpreter, or empty if there is no shebang.
def self.call(blob, _ = nil)
Language.find_by_interpreter interpreter(blob.data)
end
# Public: Get the interpreter from the shebang
#
# Returns a String or nil
def self.interpreter(data)
lines = data.lines
return unless match = /^#! ?(.*)$/.match(lines.first)
tokens = match[1].split(' ')
script = tokens.first.split('/').last
script = tokens[1] if script == 'env'
# If script has an invalid shebang, we might get here
return unless script
# "python2.6" -> "python2"
script.sub! $1, '' if script =~ /(\.\d+)$/
# Check for multiline shebang hacks that call `exec`
if script == 'sh' &&
lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
script = $1
end
File.basename(script)
end
end
end

View File

@@ -1,10 +0,0 @@
module Linguist
module Strategy
# Check if there's a shebang line and use that as authoritative
class Shebang
def self.call(blob, _)
Language.find_by_shebang(blob.data)
end
end
end
end

View File

@@ -223,34 +223,21 @@ class TestLanguage < Test::Unit::TestCase
assert_equal [Language['Chapel']], Language.find_by_filename('examples/hello.chpl') assert_equal [Language['Chapel']], Language.find_by_filename('examples/hello.chpl')
end end
def test_find_by_shebang def test_find_by_interpreter
assert_equal 'ruby', Linguist.interpreter_from_shebang("#!/usr/bin/ruby\n# baz") {
{ [] => ["", "ruby" => "Ruby",
"foo", "Rscript" => "R",
"#bar", "sh" => "Shell",
"#baz", "bash" => "Shell",
"///", "python" => "Python",
"\n\n\n\n\n", "python2" => "Python",
" #!/usr/sbin/ruby", "python3" => "Python",
"\n#!/usr/sbin/ruby"], "sbcl" => "Common Lisp"
['Ruby'] => ["#!/usr/bin/env ruby\n# baz", }.each do |interpreter, language|
"#!/usr/sbin/ruby\n# bar", assert_equal [Language[language]], Language.find_by_interpreter(interpreter)
"#!/usr/bin/ruby\n# foo",
"#!/usr/sbin/ruby",
"#!/usr/sbin/ruby foo bar baz\n"],
['R'] => ["#!/usr/bin/env Rscript\n# example R script\n#\n"],
['Shell'] => ["#!/usr/bin/bash\n", "#!/bin/sh"],
['Python'] => ["#!/bin/python\n# foo\n# bar\n# baz",
"#!/usr/bin/python2.7\n\n\n\n",
"#!/usr/bin/python3\n\n\n\n"],
["Common Lisp"] => ["#!/usr/bin/sbcl --script\n\n"]
}.each do |languages, bodies|
bodies.each do |body|
assert_equal([body, languages.map{|l| Language[l]}],
[body, Language.find_by_shebang(body)])
end
end end
assert_equal [], Language.find_by_interpreter(nil)
end end
def test_find def test_find

View File

@@ -82,9 +82,4 @@ class TestSamples < Test::Unit::TestCase
end end
end end
end end
def test_shebang
assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
end
end end

38
test/test_shebang.rb Normal file
View File

@@ -0,0 +1,38 @@
require_relative "./helper"
class TestShebang < Test::Unit::TestCase
include Linguist
def assert_interpreter(interpreter, body)
assert_equal interpreter, Shebang.interpreter(body)
end
def test_shebangs
assert_interpreter nil, ""
assert_interpreter nil, "foo"
assert_interpreter nil, "#bar"
assert_interpreter nil, "#baz"
assert_interpreter nil, "///"
assert_interpreter nil, "\n\n\n\n\n"
assert_interpreter nil, " #!/usr/sbin/ruby"
assert_interpreter nil, "\n#!/usr/sbin/ruby"
assert_interpreter "ruby", "#!/usr/sbin/ruby\n# bar"
assert_interpreter "ruby", "#!/usr/bin/ruby\n# foo"
assert_interpreter "ruby", "#!/usr/sbin/ruby"
assert_interpreter "ruby", "#!/usr/sbin/ruby foo bar baz\n"
assert_interpreter "Rscript", "#!/usr/bin/env Rscript\n# example R script\n#\n"
assert_interpreter "crystal", "#!/usr/bin/env bin/crystal"
assert_interpreter "ruby", "#!/usr/bin/env ruby\n# baz"
assert_interpreter "bash", "#!/usr/bin/bash\n"
assert_interpreter "sh", "#!/bin/sh"
assert_interpreter "python", "#!/bin/python\n# foo\n# bar\n# baz"
assert_interpreter "python2", "#!/usr/bin/python2.7\n\n\n\n"
assert_interpreter "python3", "#!/usr/bin/python3\n\n\n\n"
assert_interpreter "sbcl", "#!/usr/bin/sbcl --script\n\n"
assert_interpreter "perl", "#! perl"
end
end