From fa5496eef49a34f8ff86e5ce27b49eb7ce395c7c Mon Sep 17 00:00:00 2001 From: Arfon Smith Date: Wed, 1 Apr 2015 20:23:16 -0500 Subject: [PATCH 1/2] Fixing up shebang detection to match new tokenizer behaviour --- lib/linguist/shebang.rb | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/linguist/shebang.rb b/lib/linguist/shebang.rb index 6f04e866..5b27532a 100644 --- a/lib/linguist/shebang.rb +++ b/lib/linguist/shebang.rb @@ -23,17 +23,20 @@ module Linguist # First line must start with #! return unless shebang && shebang.start_with?("#!") - # Get the parts of the shebang without the #! - tokens = shebang.sub(/^#!\s*/, '').strip.split(' ') + s = StringScanner.new(shebang) # There was nothing after the #! - return if tokens.empty? + return unless path = s.scan(/^#!\s*\S+/) - # Get the name of the interpreter - script = File.basename(tokens.first) + # Keep going + script = path.split('/').last - # Get next argument if interpreter was /usr/bin/env - script = tokens[1] if script == 'env' + # if /usr/bin/env type shebang then walk the string + if script == 'env' + s.scan(/\s+/) + s.scan(/.*=[^\s]+\s+/) # skip over variable arguments e.g. foo=bar + script = s.scan(/\S+/) + end # Interpreter was /usr/bin/env with no arguments return unless script @@ -41,6 +44,9 @@ module Linguist # "python2.6" -> "python2" script.sub! /(\.\d+)$/, '' + # #! perl -> perl + script.sub! /^#!\s*/, '' + # Check for multiline shebang hacks that call `exec` if script == 'sh' && data.lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) } From 7394e9400bc82dd17f706fb375016052865f89a7 Mon Sep 17 00:00:00 2001 From: Arfon Smith Date: Thu, 2 Apr 2015 14:00:03 -0400 Subject: [PATCH 2/2] Adding explicit test for new shebang parsing --- test/test_shebang.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_shebang.rb b/test/test_shebang.rb index a2d25117..de3344ec 100644 --- a/test/test_shebang.rb +++ b/test/test_shebang.rb @@ -38,6 +38,7 @@ class TestShebang < Minitest::Test assert_interpreter "perl", "#! perl" assert_interpreter "ruby", "#!/bin/sh\n\n\nexec ruby $0 $@" - end + assert_interpreter "sh", "#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh" + end end