language interpreters and shebang lines

Add an interpreter array to each language, and match interpreters found
in the shebang lines of scripts to this array to identify the language
of scripts.

With suggestions from tnm. https://github.com/github/linguist/pull/687
This commit is contained in:
Eric Schulte
2013-09-15 09:55:59 -06:00
parent eb5f1468d2
commit 7a6202a8c3
2 changed files with 84 additions and 0 deletions

View File

@@ -52,6 +52,7 @@ module Linguist
yield({
:path => File.join(dirname, filename),
:language => category,
:interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
:extname => File.extname(filename)
})
end
@@ -67,6 +68,7 @@ module Linguist
def self.data
db = {}
db['extnames'] = {}
db['interpreters'] = {}
db['filenames'] = {}
each do |sample|
@@ -80,6 +82,14 @@ module Linguist
end
end
if sample[:interpreter]
db['interpreters'][language_name] ||= []
if !db['interpreters'][language_name].include?(sample[:interpreter])
db['interpreters'][language_name] << sample[:interpreter]
db['interpreters'][language_name].sort!
end
end
if sample[:filename]
db['filenames'][language_name] ||= []
db['filenames'][language_name] << sample[:filename]
@@ -95,4 +105,32 @@ module Linguist
db
end
end
# Used to retrieve the interpreter from the shebang line of a file's
# data.
def self.interpreter_from_shebang(data)
lines = data.lines
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
bang.sub!(/^#! /, '#!')
tokens = bang.split(' ')
pieces = tokens.first.split('/')
if pieces.size > 1
script = pieces.last
else
script = pieces.first.sub('#!', '')
end
script = script == 'env' ? tokens[1] : script
# "python2.6" -> "python"
if script =~ /((?:\d+\.?)+)/
script.sub! $1, ''
end
script
end
end
end