consolidate shebang logic

This commit is contained in:
Brandon Keepers
2014-11-27 12:18:23 -05:00
parent e42ccf0d82
commit fd85f7f112
5 changed files with 69 additions and 45 deletions

View File

@@ -4,4 +4,5 @@ require 'linguist/heuristics'
require 'linguist/language' require 'linguist/language'
require 'linguist/repository' require 'linguist/repository'
require 'linguist/samples' require 'linguist/samples'
require 'linguist/shebang'
require 'linguist/version' require 'linguist/version'

View File

@@ -11,7 +11,7 @@ require 'linguist/samples'
require 'linguist/file_blob' require 'linguist/file_blob'
require 'linguist/blob_helper' require 'linguist/blob_helper'
require 'linguist/strategy/filename' require 'linguist/strategy/filename'
require 'linguist/strategy/shebang' require 'linguist/shebang'
module Linguist module Linguist
# Language names that are recognizable by GitHub. Defined languages # Language names that are recognizable by GitHub. Defined languages
@@ -95,7 +95,7 @@ module Linguist
STRATEGIES = [ STRATEGIES = [
Linguist::Strategy::Filename, Linguist::Strategy::Filename,
Linguist::Strategy::Shebang, Linguist::Shebang,
Linguist::Heuristics, Linguist::Heuristics,
Linguist::Classifier Linguist::Classifier
] ]
@@ -213,6 +213,21 @@ module Linguist
@interpreter_index[Linguist.interpreter_from_shebang(data)] @interpreter_index[Linguist.interpreter_from_shebang(data)]
end end
# Public: Look up Languages by interpreter.
#
# interpreter - String of interpreter name
#
# Examples
#
# Language.find_by_interpreter("bash")
# # => [#<Language name="Bash">]
#
# Returns the matching Language
def self.find_by_interpreter(interpreter)
@interpreter_index[interpreter]
end
# Public: Look up Language by its name or lexer. # Public: Look up Language by its name or lexer.
# #
# name - The String name of the Language # name - The String name of the Language

View File

@@ -115,40 +115,9 @@ module Linguist
end end
end end
# Used to retrieve the interpreter from the shebang line of a file's # Used to retrieve the interpreter from the shebang line of a file's data.
# data.
def self.interpreter_from_shebang(data) def self.interpreter_from_shebang(data)
lines = data.lines.to_a Shebang.new(data).interpreter
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
bang.sub!(/^#! /, '#!')
tokens = bang.split(' ')
pieces = tokens.first.split('/')
if pieces.size > 1
script = pieces.last
else
script = pieces.first.sub('#!', '')
end
script = script == 'env' ? tokens[1] : script
# If script has an invalid shebang, we might get here
return unless script
# "python2.6" -> "python2"
script.sub! $1, '' if script =~ /(\.\d+)$/
# Check for multiline shebang hacks that call `exec`
if script == 'sh' &&
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
script = $1
end
File.basename(script)
else
nil
end
end end
end end

49
lib/linguist/shebang.rb Normal file
View File

@@ -0,0 +1,49 @@
module Linguist
# Check if there's a shebang line and use that as authoritative
class Shebang
def self.call(blob, _)
Language.find_by_interpreter(new(blob.data).interpreter)
end
attr_reader :data
def initialize(data)
@data = data
end
def interpreter
lines = data.lines.to_a
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
bang.sub!(/^#! /, '#!')
tokens = bang.split(' ')
pieces = tokens.first.split('/')
if pieces.size > 1
script = pieces.last
else
script = pieces.first.sub('#!', '')
end
script = script == 'env' ? tokens[1] : script
# If script has an invalid shebang, we might get here
return unless script
# "python2.6" -> "python2"
script.sub! $1, '' if script =~ /(\.\d+)$/
# Check for multiline shebang hacks that call `exec`
if script == 'sh' &&
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
script = $1
end
File.basename(script)
else
nil
end
end
end
end

View File

@@ -1,10 +0,0 @@
module Linguist
module Strategy
# Check if there's a shebang line and use that as authoritative
class Shebang
def self.call(blob, _)
Language.find_by_shebang(blob.data)
end
end
end
end