From d4c2d83af9f576aacdd97d58796d2a22fa90def4 Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Tue, 12 Dec 2017 21:53:36 +1100 Subject: [PATCH] Do not traverse symlinks in heuristics (#3946) --- lib/linguist/blob.rb | 11 ++++++++++- lib/linguist/classifier.rb | 2 ++ lib/linguist/file_blob.rb | 5 +++++ lib/linguist/heuristics.rb | 2 ++ lib/linguist/lazy_blob.rb | 5 +++++ lib/linguist/shebang.rb | 2 ++ lib/linguist/strategy/modeline.rb | 2 ++ 7 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/linguist/blob.rb b/lib/linguist/blob.rb index 4e77bc58..617e56b1 100644 --- a/lib/linguist/blob.rb +++ b/lib/linguist/blob.rb @@ -11,11 +11,13 @@ module Linguist # # path - A path String (does not necessarily exists on the file system). # content - Content of the file. + # symlink - Whether the file is a symlink. # # Returns a Blob. - def initialize(path, content) + def initialize(path, content, symlink: false) @path = path @content = content + @symlink = symlink end # Public: Filename @@ -69,5 +71,12 @@ module Linguist "." + segments[index..-1].join(".") end end + + # Public: Is this a symlink? + # + # Returns true or false. + def symlink? + @symlink + end end end diff --git a/lib/linguist/classifier.rb b/lib/linguist/classifier.rb index 9e7b5319..8b858e45 100644 --- a/lib/linguist/classifier.rb +++ b/lib/linguist/classifier.rb @@ -18,6 +18,8 @@ module Linguist # # Returns an Array of Language objects, most probable first. def self.call(blob, possible_languages) + return [] if blob.symlink? + language_names = possible_languages.map(&:name) classify(Samples.cache, blob.data[0...CLASSIFIER_CONSIDER_BYTES], language_names).map do |name, _| Language[name] # Return the actual Language objects diff --git a/lib/linguist/file_blob.rb b/lib/linguist/file_blob.rb index 3f26c033..bebebec6 100644 --- a/lib/linguist/file_blob.rb +++ b/lib/linguist/file_blob.rb @@ -26,6 +26,11 @@ module Linguist @mode ||= File.stat(@fullpath).mode.to_s(8) end + def symlink? + return @symlink if !@symlink.nil? + @symlink = (File.symlink?(@fullpath) rescue false) + end + # Public: Read file contents. # # Returns a String. diff --git a/lib/linguist/heuristics.rb b/lib/linguist/heuristics.rb index f3c06254..ba758170 100644 --- a/lib/linguist/heuristics.rb +++ b/lib/linguist/heuristics.rb @@ -16,6 +16,8 @@ module Linguist # # Returns an Array of languages, or empty if none matched or were inconclusive. def self.call(blob, candidates) + return [] if blob.symlink? + data = blob.data[0...HEURISTICS_CONSIDER_BYTES] @heuristics.each do |heuristic| diff --git a/lib/linguist/lazy_blob.rb b/lib/linguist/lazy_blob.rb index 339c2e36..d93fb6f7 100644 --- a/lib/linguist/lazy_blob.rb +++ b/lib/linguist/lazy_blob.rb @@ -80,6 +80,11 @@ module Linguist @size end + def symlink? + # We don't create LazyBlobs for symlinks. + false + end + def cleanup! @data.clear if @data end diff --git a/lib/linguist/shebang.rb b/lib/linguist/shebang.rb index 7e5f9833..290dca70 100644 --- a/lib/linguist/shebang.rb +++ b/lib/linguist/shebang.rb @@ -11,6 +11,8 @@ module Linguist # Returns an Array with one Language if the blob has a shebang with a valid # interpreter, or empty if there is no shebang. def self.call(blob, _ = nil) + return [] if blob.symlink? + Language.find_by_interpreter interpreter(blob.data) end diff --git a/lib/linguist/strategy/modeline.rb b/lib/linguist/strategy/modeline.rb index 16474798..a0208b3f 100644 --- a/lib/linguist/strategy/modeline.rb +++ b/lib/linguist/strategy/modeline.rb @@ -109,6 +109,8 @@ module Linguist # Returns an Array with one Language if the blob has a Vim or Emacs modeline # that matches a Language name or alias. Returns an empty array if no match. def self.call(blob, _ = nil) + return [] if blob.symlink? + header = blob.first_lines(SEARCH_SCOPE).join("\n") footer = blob.last_lines(SEARCH_SCOPE).join("\n") Array(Language.find_by_alias(modeline(header + footer)))