From 7e178cc41634e257df2fa31df9b80c94b1ae08a1 Mon Sep 17 00:00:00 2001
From: Ted Nyman <ted@ted.io>
Date: Fri, 6 Dec 2013 20:39:02 -0800
Subject: [PATCH] Place guards, checks for multiline shell hacks

---
 lib/linguist/blob_helper.rb |  4 ++--
 lib/linguist/language.rb    |  6 +++---
 lib/linguist/samples.json   |  5 ++++-
 lib/linguist/samples.rb     | 10 +++++++++-
 samples/Racket/rkt.script!  |  7 -------
 5 files changed, 18 insertions(+), 14 deletions(-)
 delete mode 100755 samples/Racket/rkt.script!

diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb
index 81956e47..37793a36 100644
--- a/lib/linguist/blob_helper.rb
+++ b/lib/linguist/blob_helper.rb
@@ -190,9 +190,9 @@ module Linguist
     # Public: Is the blob safe to colorize?
     #
     # We use Pygments for syntax highlighting blobs. Pygments
-    # can be too slow for very large blobs or for certain 
+    # can be too slow for very large blobs or for certain
     # corner-case blobs.
-    # 
+    #
     # Return true or false
     def safe_to_colorize?
       !large? && text? && !high_ratio_of_long_lines?
diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb
index 49bbbbde..1cdf8c7b 100644
--- a/lib/linguist/language.rb
+++ b/lib/linguist/language.rb
@@ -110,10 +110,10 @@ module Linguist
         data = data.call() if data.respond_to?(:call)
         if data.nil? || data == ""
           nil
-        elsif result = find_by_shebang(data)
+        elsif (result = find_by_shebang(data)) && !result.empty?
           result.first
-        elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
-          Language[result[0]]
+        elsif classified = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
+          Language[classified[0]]
         end
       else
         possible_languages.first
diff --git a/lib/linguist/samples.json b/lib/linguist/samples.json
index 1c37ea3b..49c1e89c 100644
--- a/lib/linguist/samples.json
+++ b/lib/linguist/samples.json
@@ -421,6 +421,9 @@
     "Xtend": [
       ".xtend"
     ]
+  },
+  "interpreters": {
+
   },
   "filenames": {
     "ApacheConf": [
@@ -43881,5 +43884,5 @@
     "Xtend": 2,
     "YAML": 1
   },
-  "md5": "647da23cd1eb02653f50ff9bfbb6e70d"
+  "md5": "9ef710bbe7098e21726a69720f0922b5"
 }
\ No newline at end of file
diff --git a/lib/linguist/samples.rb b/lib/linguist/samples.rb
index a2e5ec21..2bd5212e 100644
--- a/lib/linguist/samples.rb
+++ b/lib/linguist/samples.rb
@@ -114,7 +114,7 @@ module Linguist
   # Used to retrieve the interpreter from the shebang line of a file's
   # data.
   def self.interpreter_from_shebang(data)
-    lines = data.lines
+    lines = data.lines.to_a
 
     if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
       bang.sub!(/^#! /, '#!')
@@ -134,7 +134,15 @@ module Linguist
         script.sub! $1, ''
       end
 
+      # Check for multiline shebang hacks that call `exec`
+      if script == 'sh' &&
+        lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
+        script = $1
+      end
+
       script
+    else
+      nil
     end
   end
 
diff --git a/samples/Racket/rkt.script! b/samples/Racket/rkt.script!
deleted file mode 100755
index bc5a8ca4..00000000
--- a/samples/Racket/rkt.script!
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/sh
-#| -*- scheme -*-
-exec racket -um "$0" "$@"
-|#
-
-(require racket/file racket/path racket/list racket/string
-         (for-syntax racket/base))