From b275b5d728fd0330f8df29513a8c935528e2bc36 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Fri, 4 Sep 2015 10:24:06 +0200 Subject: [PATCH] Soften memory pressure --- lib/linguist/generated.rb | 8 ++++++-- lib/linguist/heuristics.rb | 3 ++- lib/linguist/lazy_blob.rb | 4 ++++ lib/linguist/repository.rb | 7 +++++-- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/linguist/generated.rb b/lib/linguist/generated.rb index f1fb2d19..d107e737 100644 --- a/lib/linguist/generated.rb +++ b/lib/linguist/generated.rb @@ -241,22 +241,26 @@ module Linguist return lines[0].include?("Code generated by") end + PROTOBUF_EXTENSIONS = ['.py', '.java', '.h', '.cc', '.cpp'] + # Internal: Is the blob a C++, Java or Python source file generated by the # Protocol Buffer compiler? # # Returns true of false. def generated_protocol_buffer? - return false unless ['.py', '.java', '.h', '.cc', '.cpp'].include?(extname) + return false unless PROTOBUF_EXTENSIONS.include?(extname) return false unless lines.count > 1 return lines[0].include?("Generated by the protocol buffer compiler. DO NOT EDIT!") end + APACHE_THRIFT_EXTENSIONS = ['.rb', '.py', '.go', '.js', '.m', '.java', '.h', '.cc', '.cpp'] + # Internal: Is the blob generated by Apache Thrift compiler? # # Returns true or false def generated_apache_thrift? - return false unless ['.rb', '.py', '.go', '.js', '.m', '.java', '.h', '.cc', '.cpp'].include?(extname) + return false unless APACHE_THRIFT_EXTENSIONS.include?(extname) return false unless lines.count > 1 return lines[0].include?("Autogenerated by Thrift Compiler") || lines[1].include?("Autogenerated by Thrift Compiler") diff --git a/lib/linguist/heuristics.rb b/lib/linguist/heuristics.rb index 1660d99f..11f58b28 100644 --- a/lib/linguist/heuristics.rb +++ b/lib/linguist/heuristics.rb @@ -56,7 +56,8 @@ module Linguist # Internal: Check if this heuristic matches the candidate languages. def matches?(filename) - @extensions.any? { |ext| filename.downcase.end_with?(ext) } + filename = filename.downcase + @extensions.any? { |ext| filename.end_with?(ext) } end # Internal: Perform the heuristic diff --git a/lib/linguist/lazy_blob.rb b/lib/linguist/lazy_blob.rb index 55c10309..28fb78f3 100644 --- a/lib/linguist/lazy_blob.rb +++ b/lib/linguist/lazy_blob.rb @@ -79,6 +79,10 @@ module Linguist @size end + def cleanup! + @data.clear if @data + end + protected # Returns true if the attribute is present and not the string "false". diff --git a/lib/linguist/repository.rb b/lib/linguist/repository.rb index 181ddf0e..01e595da 100644 --- a/lib/linguist/repository.rb +++ b/lib/linguist/repository.rb @@ -157,8 +157,11 @@ module Linguist blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode.to_s(8)) - next unless blob.include_in_language_stats? - file_map[new] = [blob.language.group.name, blob.size] + if blob.include_in_language_stats? + file_map[new] = [blob.language.group.name, blob.size] + end + + blob.cleanup! end end