Merge pull request #2299 from github/shebang-fixing

Fixing up shebang detection to match new tokenizer behaviour
2026-06-09 22:13:16 +00:00 · 2015-04-02 14:14:29 -04:00
parent 55eaf03afb 7394e9400b
commit a6a745d99d
2 changed files with 15 additions and 8 deletions
--- a/lib/linguist/shebang.rb
+++ b/lib/linguist/shebang.rb
@@ -23,17 +23,20 @@ module Linguist
      # First line must start with #!
      return unless shebang && shebang.start_with?("#!")

-      # Get the parts of the shebang without the #!
-      tokens = shebang.sub(/^#!\s*/, '').strip.split(' ')
+      s = StringScanner.new(shebang)

      # There was nothing after the #!
-      return if tokens.empty?
+      return unless path = s.scan(/^#!\s*\S+/)

-      # Get the name of the interpreter
-      script = File.basename(tokens.first)
+      # Keep going
+      script = path.split('/').last

-      # Get next argument if interpreter was /usr/bin/env
-      script = tokens[1] if script == 'env'
+      # if /usr/bin/env type shebang then walk the string
+      if script == 'env'
+        s.scan(/\s+/)
+        s.scan(/.*=[^\s]+\s+/) # skip over variable arguments e.g. foo=bar
+        script = s.scan(/\S+/)
+      end

      # Interpreter was /usr/bin/env with no arguments
      return unless script
@@ -41,6 +44,9 @@ module Linguist
      # "python2.6" -> "python2"
      script.sub! /(\.\d+)$/, ''

+      # #! perl -> perl
+      script.sub! /^#!\s*/, ''
+
      # Check for multiline shebang hacks that call `exec`
      if script == 'sh' &&
        data.lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
--- a/test/test_shebang.rb
+++ b/test/test_shebang.rb
@@ -38,6 +38,7 @@ class TestShebang < Minitest::Test
    assert_interpreter "perl", "#! perl"

    assert_interpreter "ruby", "#!/bin/sh\n\n\nexec ruby $0 $@"
-  end

+    assert_interpreter "sh", "#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh"
+  end
 end