From fd85f7f112bd231d338fbcb6cd834a3f889da370 Mon Sep 17 00:00:00 2001 From: Brandon Keepers Date: Thu, 27 Nov 2014 12:18:23 -0500 Subject: [PATCH] consolidate shebang logic --- lib/linguist.rb | 1 + lib/linguist/language.rb | 19 +++++++++++-- lib/linguist/samples.rb | 35 ++--------------------- lib/linguist/shebang.rb | 49 ++++++++++++++++++++++++++++++++ lib/linguist/strategy/shebang.rb | 10 ------- 5 files changed, 69 insertions(+), 45 deletions(-) create mode 100644 lib/linguist/shebang.rb delete mode 100644 lib/linguist/strategy/shebang.rb diff --git a/lib/linguist.rb b/lib/linguist.rb index 3714b5a0..ff9fc3a2 100644 --- a/lib/linguist.rb +++ b/lib/linguist.rb @@ -4,4 +4,5 @@ require 'linguist/heuristics' require 'linguist/language' require 'linguist/repository' require 'linguist/samples' +require 'linguist/shebang' require 'linguist/version' diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 07972019..7c51ae9d 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -11,7 +11,7 @@ require 'linguist/samples' require 'linguist/file_blob' require 'linguist/blob_helper' require 'linguist/strategy/filename' -require 'linguist/strategy/shebang' +require 'linguist/shebang' module Linguist # Language names that are recognizable by GitHub. Defined languages @@ -95,7 +95,7 @@ module Linguist STRATEGIES = [ Linguist::Strategy::Filename, - Linguist::Strategy::Shebang, + Linguist::Shebang, Linguist::Heuristics, Linguist::Classifier ] @@ -213,6 +213,21 @@ module Linguist @interpreter_index[Linguist.interpreter_from_shebang(data)] end + # Public: Look up Languages by interpreter. + # + # interpreter - String of interpreter name + # + # Examples + # + # Language.find_by_interpreter("bash") + # # => [#] + # + # Returns the matching Language + def self.find_by_interpreter(interpreter) + @interpreter_index[interpreter] + end + + # Public: Look up Language by its name or lexer. # # name - The String name of the Language diff --git a/lib/linguist/samples.rb b/lib/linguist/samples.rb index 001204b5..1cacdf09 100644 --- a/lib/linguist/samples.rb +++ b/lib/linguist/samples.rb @@ -115,40 +115,9 @@ module Linguist end end - # Used to retrieve the interpreter from the shebang line of a file's - # data. + # Used to retrieve the interpreter from the shebang line of a file's data. def self.interpreter_from_shebang(data) - lines = data.lines.to_a - - if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/ - bang.sub!(/^#! /, '#!') - tokens = bang.split(' ') - pieces = tokens.first.split('/') - - if pieces.size > 1 - script = pieces.last - else - script = pieces.first.sub('#!', '') - end - - script = script == 'env' ? tokens[1] : script - - # If script has an invalid shebang, we might get here - return unless script - - # "python2.6" -> "python2" - script.sub! $1, '' if script =~ /(\.\d+)$/ - - # Check for multiline shebang hacks that call `exec` - if script == 'sh' && - lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) } - script = $1 - end - - File.basename(script) - else - nil - end + Shebang.new(data).interpreter end end diff --git a/lib/linguist/shebang.rb b/lib/linguist/shebang.rb new file mode 100644 index 00000000..52933e2b --- /dev/null +++ b/lib/linguist/shebang.rb @@ -0,0 +1,49 @@ +module Linguist + # Check if there's a shebang line and use that as authoritative + class Shebang + def self.call(blob, _) + Language.find_by_interpreter(new(blob.data).interpreter) + end + + attr_reader :data + + def initialize(data) + @data = data + end + + def interpreter + lines = data.lines.to_a + + if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/ + bang.sub!(/^#! /, '#!') + tokens = bang.split(' ') + pieces = tokens.first.split('/') + + if pieces.size > 1 + script = pieces.last + else + script = pieces.first.sub('#!', '') + end + + script = script == 'env' ? tokens[1] : script + + # If script has an invalid shebang, we might get here + return unless script + + # "python2.6" -> "python2" + script.sub! $1, '' if script =~ /(\.\d+)$/ + + # Check for multiline shebang hacks that call `exec` + if script == 'sh' && + lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) } + script = $1 + end + + File.basename(script) + else + nil + end + + end + end +end diff --git a/lib/linguist/strategy/shebang.rb b/lib/linguist/strategy/shebang.rb deleted file mode 100644 index dd5bc38b..00000000 --- a/lib/linguist/strategy/shebang.rb +++ /dev/null @@ -1,10 +0,0 @@ -module Linguist - module Strategy - # Check if there's a shebang line and use that as authoritative - class Shebang - def self.call(blob, _) - Language.find_by_shebang(blob.data) - end - end - end -end