Merge pull request #2630 from github/vmg/memory

4.6.0: Reduce memory pressure
This commit is contained in:
Vicent Marti
2015-09-15 11:19:45 +02:00
7 changed files with 160 additions and 7 deletions

141
bin/git-linguist Executable file
View File

@@ -0,0 +1,141 @@
#!/usr/bin/env ruby
require 'linguist'
require 'rugged'
require 'optparse'
require 'json'
require 'tmpdir'
require 'zlib'
class GitLinguist
attr_reader :repo_path
attr_reader :commit_oid
attr_reader :incremental
def initialize(path, commit_oid, incremental = true)
@repo_path = path
@commit_oid = commit_oid || rugged.head.target_id
@incremental = incremental
end
def linguist
repo = Linguist::Repository.new(rugged, commit_oid)
if incremental && stats = load_language_stats
old_commit_oid, old_stats = stats
# A cache with NULL oid means that we want to froze
# these language stats in place and stop computing
# them (for performance reasons)
return old_stats if old_commit_oid == NULL_OID
repo.load_existing_stats(old_commit_oid, old_stats)
end
result = yield repo
save_language_stats(commit_oid, repo.cache)
result
end
def load_language_stats
version, commit_oid, stats = load_cache
if version == LANGUAGE_STATS_CACHE_VERSION && commit_oid && stats
[commit_oid, stats]
end
end
def save_language_stats(commit_oid, stats)
cache = [LANGUAGE_STATS_CACHE_VERSION, commit_oid, stats]
write_cache(cache)
end
def clear_language_stats
File.unlink(cache_file)
end
def disable_language_stats
save_language_stats(NULL_OID, {})
end
protected
NULL_OID = ("0" * 40).freeze
LANGUAGE_STATS_CACHE = 'language-stats.cache'
LANGUAGE_STATS_CACHE_VERSION = "v3:#{Linguist::VERSION}"
def rugged
@rugged ||= Rugged::Repository.bare(repo_path)
end
def cache_file
File.join(repo_path, LANGUAGE_STATS_CACHE)
end
def write_cache(object)
tmp_path = Dir::Tmpname.make_tmpname(cache_file, nil)
File.open(tmp_path, "wb") do |f|
marshal = Marshal.dump(object)
f.write(Zlib::Deflate.deflate(marshal))
end
File.rename(tmp_path, cache_file)
tmp_path = nil
ensure
(File.unlink(tmp_path) rescue nil) if tmp_path
end
def load_cache
marshal = File.open(cache_file, "rb") { |f| Zlib::Inflate.inflate(f.read) }
Marshal.load(marshal)
rescue SystemCallError, ::Zlib::DataError, ::Zlib::BufError, TypeError
nil
end
end
def git_linguist(args)
incremental = true
commit = nil
git_dir = nil
parser = OptionParser.new do |opts|
opts.banner = "Usage: git-linguist [OPTIONS] stats|breakdown|dump-cache|clear|disable"
opts.on("-f", "--force", "Force a full rescan") { incremental = false }
opts.on("--git-dir=DIR", "Path to the git repository") { |v| git_dir = v }
opts.on("--commit=COMMIT", "Commit to index") { |v| commit = v}
end
parser.parse!(args)
git_dir ||= begin
pwd = Dir.pwd
dotgit = File.join(pwd, ".git")
File.directory?(dotgit) ? dotgit : pwd
end
wrapper = GitLinguist.new(git_dir, commit, incremental)
case args.pop
when "stats"
wrapper.linguist do |linguist|
puts JSON.dump(linguist.languages)
end
when "breakdown"
wrapper.linguist do |linguist|
puts JSON.dump(linguist.breakdown_by_file)
end
when "dump-cache"
puts JSON.dump(wrapper.load_language_stats)
when "clear"
wrapper.clear_language_stats
when "disable"
wrapper.disable_language_stats
else
$stderr.print(parser.help)
exit 1
end
end
git_linguist(ARGV)

View File

@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
s.license = "MIT"
s.files = Dir['lib/**/*'] - ['lib/linguist/grammars.rb']
s.executables << 'linguist'
s.executables = ['linguist', 'git-linguist']
s.add_dependency 'charlock_holmes', '~> 0.7.3'
s.add_dependency 'escape_utils', '~> 1.1.0'

View File

@@ -241,22 +241,26 @@ module Linguist
return lines[0].include?("Code generated by")
end
PROTOBUF_EXTENSIONS = ['.py', '.java', '.h', '.cc', '.cpp']
# Internal: Is the blob a C++, Java or Python source file generated by the
# Protocol Buffer compiler?
#
# Returns true of false.
def generated_protocol_buffer?
return false unless ['.py', '.java', '.h', '.cc', '.cpp'].include?(extname)
return false unless PROTOBUF_EXTENSIONS.include?(extname)
return false unless lines.count > 1
return lines[0].include?("Generated by the protocol buffer compiler. DO NOT EDIT!")
end
APACHE_THRIFT_EXTENSIONS = ['.rb', '.py', '.go', '.js', '.m', '.java', '.h', '.cc', '.cpp']
# Internal: Is the blob generated by Apache Thrift compiler?
#
# Returns true or false
def generated_apache_thrift?
return false unless ['.rb', '.py', '.go', '.js', '.m', '.java', '.h', '.cc', '.cpp'].include?(extname)
return false unless APACHE_THRIFT_EXTENSIONS.include?(extname)
return false unless lines.count > 1
return lines[0].include?("Autogenerated by Thrift Compiler") || lines[1].include?("Autogenerated by Thrift Compiler")

View File

@@ -56,7 +56,8 @@ module Linguist
# Internal: Check if this heuristic matches the candidate languages.
def matches?(filename)
@extensions.any? { |ext| filename.downcase.end_with?(ext) }
filename = filename.downcase
@extensions.any? { |ext| filename.end_with?(ext) }
end
# Internal: Perform the heuristic

View File

@@ -79,6 +79,10 @@ module Linguist
@size
end
def cleanup!
@data.clear if @data
end
protected
# Returns true if the attribute is present and not the string "false".

View File

@@ -157,8 +157,11 @@ module Linguist
blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode.to_s(8))
next unless blob.include_in_language_stats?
file_map[new] = [blob.language.group.name, blob.size]
if blob.include_in_language_stats?
file_map[new] = [blob.language.group.name, blob.size]
end
blob.cleanup!
end
end

View File

@@ -1,3 +1,3 @@
module Linguist
VERSION = "4.5.15"
VERSION = "4.6.0"
end