Fixing up shebang detection to match new tokenizer behaviour

2026-01-09 10:55:32 +00:00 · 2015-04-01 20:23:16 -05:00
parent 893bd7e204
commit fa5496eef4
1 changed files with 13 additions and 7 deletions
--- a/lib/linguist/shebang.rb
+++ b/lib/linguist/shebang.rb
@@ -23,17 +23,20 @@ module Linguist
      # First line must start with #!
      return unless shebang && shebang.start_with?("#!")
-      # Get the parts of the shebang without the #!
+      s = StringScanner.new(shebang)
      tokens = shebang.sub(/^#!\s*/, '').strip.split(' ')
      # There was nothing after the #!
-      return if tokens.empty?
+      return unless path = s.scan(/^#!\s*\S+/)
-      # Get the name of the interpreter
+      # Keep going
-      script = File.basename(tokens.first)
+      script = path.split('/').last
-      # Get next argument if interpreter was /usr/bin/env
+      # if /usr/bin/env type shebang then walk the string
-      script = tokens[1] if script == 'env'
+      if script == 'env'
        s.scan(/\s+/)
        s.scan(/.*=[^\s]+\s+/) # skip over variable arguments e.g. foo=bar
        script = s.scan(/\S+/)
      end
      # Interpreter was /usr/bin/env with no arguments
      return unless script
@@ -41,6 +44,9 @@ module Linguist
      # "python2.6" -> "python2"
      script.sub! /(\.\d+)$/, ''
      # #! perl -> perl
      script.sub! /^#!\s*/, ''
      # Check for multiline shebang hacks that call `exec`
      if script == 'sh' &&
        data.lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }