From bf0e488c06bd61ceded37ae04babbdc5f8aa600d Mon Sep 17 00:00:00 2001 From: Brandon Keepers Date: Wed, 10 Dec 2014 10:58:14 -0500 Subject: [PATCH 1/5] Test case for case causing error --- test/test_shebang.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_shebang.rb b/test/test_shebang.rb index 4b6b39a6..071a436c 100644 --- a/test/test_shebang.rb +++ b/test/test_shebang.rb @@ -17,6 +17,7 @@ class TestShebang < Test::Unit::TestCase assert_interpreter nil, " #!/usr/sbin/ruby" assert_interpreter nil, "\n#!/usr/sbin/ruby" assert_interpreter nil, "#!" + assert_interpreter nil, "#! " assert_interpreter "ruby", "#!/usr/sbin/ruby\n# bar" assert_interpreter "ruby", "#!/usr/bin/ruby\n# foo" From afac6a918d84a8c8e14165bf6ea0ca1edd7a482c Mon Sep 17 00:00:00 2001 From: Brandon Keepers Date: Wed, 10 Dec 2014 11:00:32 -0500 Subject: [PATCH 2/5] Handle empty shebang with whitespace --- lib/linguist/shebang.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/linguist/shebang.rb b/lib/linguist/shebang.rb index 3405bb62..07694667 100644 --- a/lib/linguist/shebang.rb +++ b/lib/linguist/shebang.rb @@ -19,11 +19,12 @@ module Linguist # Returns a String or nil def self.interpreter(data) lines = data.lines - return unless match = /^#! ?(.+)$/.match(lines.first) - tokens = match[1].split(' ') - script = tokens.first.split('/').last + return unless match = /^#!(.+)$/.match(lines.first) + tokens = match[1].strip.split(' ') + return if tokens.empty? + script = File.basename(tokens.first) script = tokens[1] if script == 'env' # If script has an invalid shebang, we might get here From 1490425ecbbf02cb5daf3d4f281511635f4f22c0 Mon Sep 17 00:00:00 2001 From: Brandon Keepers Date: Wed, 10 Dec 2014 11:00:41 -0500 Subject: [PATCH 3/5] document shebang code --- lib/linguist/shebang.rb | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/linguist/shebang.rb b/lib/linguist/shebang.rb index 07694667..4e47ad6b 100644 --- a/lib/linguist/shebang.rb +++ b/lib/linguist/shebang.rb @@ -20,18 +20,25 @@ module Linguist def self.interpreter(data) lines = data.lines + # First line must start with #! return unless match = /^#!(.+)$/.match(lines.first) tokens = match[1].strip.split(' ') + + # There was nothing after the #! return if tokens.empty? + + # Get the name of the interpreter script = File.basename(tokens.first) + + # Get next argument if interpreter was /usr/bin/env script = tokens[1] if script == 'env' - # If script has an invalid shebang, we might get here + # Interpreter was /usr/bin/env with no arguments return unless script # "python2.6" -> "python2" - script.sub! $1, '' if script =~ /(\.\d+)$/ + script.sub! /(\.\d+)$/, '' # Check for multiline shebang hacks that call `exec` if script == 'sh' && From 6efee511011b5a592a5221927e7050aa345e5dd7 Mon Sep 17 00:00:00 2001 From: Brandon Keepers Date: Wed, 10 Dec 2014 11:00:46 -0500 Subject: [PATCH 4/5] Add missing test cases --- test/test_shebang.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_shebang.rb b/test/test_shebang.rb index 071a436c..5ead299d 100644 --- a/test/test_shebang.rb +++ b/test/test_shebang.rb @@ -18,6 +18,7 @@ class TestShebang < Test::Unit::TestCase assert_interpreter nil, "\n#!/usr/sbin/ruby" assert_interpreter nil, "#!" assert_interpreter nil, "#! " + assert_interpreter nil, "#!/usr/bin/env" assert_interpreter "ruby", "#!/usr/sbin/ruby\n# bar" assert_interpreter "ruby", "#!/usr/bin/ruby\n# foo" @@ -35,6 +36,8 @@ class TestShebang < Test::Unit::TestCase assert_interpreter "python3", "#!/usr/bin/python3\n\n\n\n" assert_interpreter "sbcl", "#!/usr/bin/sbcl --script\n\n" assert_interpreter "perl", "#! perl" + + assert_interpreter "ruby", "#!/bin/sh\n\n\nexec ruby $0 $@" end end From 21b8e16afc15e84dce7161822aba173eba52750c Mon Sep 17 00:00:00 2001 From: Brandon Keepers Date: Wed, 10 Dec 2014 12:05:37 -0500 Subject: [PATCH 5/5] Use #start_with? for clarity --- lib/linguist/shebang.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/linguist/shebang.rb b/lib/linguist/shebang.rb index 4e47ad6b..6f04e866 100644 --- a/lib/linguist/shebang.rb +++ b/lib/linguist/shebang.rb @@ -18,12 +18,13 @@ module Linguist # # Returns a String or nil def self.interpreter(data) - lines = data.lines + shebang = data.lines.first # First line must start with #! - return unless match = /^#!(.+)$/.match(lines.first) + return unless shebang && shebang.start_with?("#!") - tokens = match[1].strip.split(' ') + # Get the parts of the shebang without the #! + tokens = shebang.sub(/^#!\s*/, '').strip.split(' ') # There was nothing after the #! return if tokens.empty? @@ -42,7 +43,7 @@ module Linguist # Check for multiline shebang hacks that call `exec` if script == 'sh' && - lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) } + data.lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) } script = $1 end