mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
language interpreters and shebang lines
Add an interpreter array to each language, and match interpreters found in the shebang lines of scripts to this array to identify the language of scripts. With suggestions from tnm. https://github.com/github/linguist/pull/687
This commit is contained in:
@@ -17,6 +17,7 @@ module Linguist
|
||||
@alias_index = {}
|
||||
|
||||
@extension_index = Hash.new { |h,k| h[k] = [] }
|
||||
@interpreter_index = Hash.new { |h,k| h[k] = [] }
|
||||
@filename_index = Hash.new { |h,k| h[k] = [] }
|
||||
@primary_extension_index = {}
|
||||
|
||||
@@ -71,6 +72,10 @@ module Linguist
|
||||
|
||||
@primary_extension_index[language.primary_extension] = language
|
||||
|
||||
language.interpreters.each do |interpreter|
|
||||
@interpreter_index[interpreter] << language
|
||||
end
|
||||
|
||||
language.filenames.each do |filename|
|
||||
@filename_index[filename] << language
|
||||
end
|
||||
@@ -101,6 +106,8 @@ module Linguist
|
||||
data = data.call() if data.respond_to?(:call)
|
||||
if data.nil? || data == ""
|
||||
nil
|
||||
elsif result = find_by_shebang(data)
|
||||
result.first
|
||||
elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
|
||||
Language[result[0]]
|
||||
end
|
||||
@@ -162,6 +169,20 @@ module Linguist
|
||||
langs.compact.uniq
|
||||
end
|
||||
|
||||
# Public: Look up Languages by shebang line.
|
||||
#
|
||||
# data - Array of tokens or String data to analyze.
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# Language.find_by_shebang("#!/bin/bash\ndate;")
|
||||
# # => [#<Language name="Bash">]
|
||||
#
|
||||
# Returns the matching Language
|
||||
def self.find_by_shebang(data)
|
||||
@interpreter_index[Linguist.interpreter_from_shebang(data)]
|
||||
end
|
||||
|
||||
# Public: Look up Language by its name or lexer.
|
||||
#
|
||||
# name - The String name of the Language
|
||||
@@ -247,6 +268,7 @@ module Linguist
|
||||
|
||||
# Set extensions or default to [].
|
||||
@extensions = attributes[:extensions] || []
|
||||
@interpreters = attributes[:interpreters] || []
|
||||
@filenames = attributes[:filenames] || []
|
||||
|
||||
unless @primary_extension = attributes[:primary_extension]
|
||||
@@ -359,6 +381,15 @@ module Linguist
|
||||
# Returns the extension String.
|
||||
attr_reader :primary_extension
|
||||
|
||||
# Public: Get interpreters
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# # => ['awk', 'gawk', 'mawk' ...]
|
||||
#
|
||||
# Returns the interpreters Array
|
||||
attr_reader :interpreters
|
||||
|
||||
# Public: Get filenames
|
||||
#
|
||||
# Examples
|
||||
@@ -452,11 +483,13 @@ module Linguist
|
||||
end
|
||||
|
||||
extensions = Samples::DATA['extnames']
|
||||
interpreters = Samples::DATA['interpreters']
|
||||
filenames = Samples::DATA['filenames']
|
||||
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
|
||||
|
||||
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
|
||||
options['extensions'] ||= []
|
||||
options['interpreters'] ||= []
|
||||
options['filenames'] ||= []
|
||||
|
||||
if extnames = extensions[name]
|
||||
@@ -467,6 +500,18 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
if interpreters == nil
|
||||
interpreters = {}
|
||||
end
|
||||
|
||||
if interpreter_names = interpreters[name]
|
||||
interpreter_names.each do |interpreter|
|
||||
if !options['interpreters'].include?(interpreter)
|
||||
options['interpreters'] << interpreter
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if fns = filenames[name]
|
||||
fns.each do |filename|
|
||||
if !options['filenames'].include?(filename)
|
||||
@@ -487,6 +532,7 @@ module Linguist
|
||||
:searchable => options.key?('searchable') ? options['searchable'] : true,
|
||||
:search_term => options['search_term'],
|
||||
:extensions => options['extensions'].sort,
|
||||
:interpreters => options['interpreters'].sort,
|
||||
:primary_extension => options['primary_extension'],
|
||||
:filenames => options['filenames'],
|
||||
:popular => popular.include?(name)
|
||||
|
||||
@@ -52,6 +52,7 @@ module Linguist
|
||||
yield({
|
||||
:path => File.join(dirname, filename),
|
||||
:language => category,
|
||||
:interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
|
||||
:extname => File.extname(filename)
|
||||
})
|
||||
end
|
||||
@@ -67,6 +68,7 @@ module Linguist
|
||||
def self.data
|
||||
db = {}
|
||||
db['extnames'] = {}
|
||||
db['interpreters'] = {}
|
||||
db['filenames'] = {}
|
||||
|
||||
each do |sample|
|
||||
@@ -80,6 +82,14 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
if sample[:interpreter]
|
||||
db['interpreters'][language_name] ||= []
|
||||
if !db['interpreters'][language_name].include?(sample[:interpreter])
|
||||
db['interpreters'][language_name] << sample[:interpreter]
|
||||
db['interpreters'][language_name].sort!
|
||||
end
|
||||
end
|
||||
|
||||
if sample[:filename]
|
||||
db['filenames'][language_name] ||= []
|
||||
db['filenames'][language_name] << sample[:filename]
|
||||
@@ -95,4 +105,32 @@ module Linguist
|
||||
db
|
||||
end
|
||||
end
|
||||
|
||||
# Used to retrieve the interpreter from the shebang line of a file's
|
||||
# data.
|
||||
def self.interpreter_from_shebang(data)
|
||||
lines = data.lines
|
||||
|
||||
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
||||
bang.sub!(/^#! /, '#!')
|
||||
tokens = bang.split(' ')
|
||||
pieces = tokens.first.split('/')
|
||||
|
||||
if pieces.size > 1
|
||||
script = pieces.last
|
||||
else
|
||||
script = pieces.first.sub('#!', '')
|
||||
end
|
||||
|
||||
script = script == 'env' ? tokens[1] : script
|
||||
|
||||
# "python2.6" -> "python"
|
||||
if script =~ /((?:\d+\.?)+)/
|
||||
script.sub! $1, ''
|
||||
end
|
||||
|
||||
script
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user