mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-12-08 20:38:47 +00:00
Merge pull request #2630 from github/vmg/memory
4.6.0: Reduce memory pressure
This commit is contained in:
141
bin/git-linguist
Executable file
141
bin/git-linguist
Executable file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require 'linguist'
|
||||
require 'rugged'
|
||||
require 'optparse'
|
||||
require 'json'
|
||||
require 'tmpdir'
|
||||
require 'zlib'
|
||||
|
||||
class GitLinguist
|
||||
attr_reader :repo_path
|
||||
attr_reader :commit_oid
|
||||
attr_reader :incremental
|
||||
|
||||
def initialize(path, commit_oid, incremental = true)
|
||||
@repo_path = path
|
||||
@commit_oid = commit_oid || rugged.head.target_id
|
||||
@incremental = incremental
|
||||
end
|
||||
|
||||
def linguist
|
||||
repo = Linguist::Repository.new(rugged, commit_oid)
|
||||
|
||||
if incremental && stats = load_language_stats
|
||||
old_commit_oid, old_stats = stats
|
||||
|
||||
# A cache with NULL oid means that we want to froze
|
||||
# these language stats in place and stop computing
|
||||
# them (for performance reasons)
|
||||
return old_stats if old_commit_oid == NULL_OID
|
||||
repo.load_existing_stats(old_commit_oid, old_stats)
|
||||
end
|
||||
|
||||
result = yield repo
|
||||
|
||||
save_language_stats(commit_oid, repo.cache)
|
||||
result
|
||||
end
|
||||
|
||||
def load_language_stats
|
||||
version, commit_oid, stats = load_cache
|
||||
if version == LANGUAGE_STATS_CACHE_VERSION && commit_oid && stats
|
||||
[commit_oid, stats]
|
||||
end
|
||||
end
|
||||
|
||||
def save_language_stats(commit_oid, stats)
|
||||
cache = [LANGUAGE_STATS_CACHE_VERSION, commit_oid, stats]
|
||||
write_cache(cache)
|
||||
end
|
||||
|
||||
def clear_language_stats
|
||||
File.unlink(cache_file)
|
||||
end
|
||||
|
||||
def disable_language_stats
|
||||
save_language_stats(NULL_OID, {})
|
||||
end
|
||||
|
||||
protected
|
||||
NULL_OID = ("0" * 40).freeze
|
||||
|
||||
LANGUAGE_STATS_CACHE = 'language-stats.cache'
|
||||
LANGUAGE_STATS_CACHE_VERSION = "v3:#{Linguist::VERSION}"
|
||||
|
||||
def rugged
|
||||
@rugged ||= Rugged::Repository.bare(repo_path)
|
||||
end
|
||||
|
||||
def cache_file
|
||||
File.join(repo_path, LANGUAGE_STATS_CACHE)
|
||||
end
|
||||
|
||||
def write_cache(object)
|
||||
tmp_path = Dir::Tmpname.make_tmpname(cache_file, nil)
|
||||
|
||||
File.open(tmp_path, "wb") do |f|
|
||||
marshal = Marshal.dump(object)
|
||||
f.write(Zlib::Deflate.deflate(marshal))
|
||||
end
|
||||
|
||||
File.rename(tmp_path, cache_file)
|
||||
tmp_path = nil
|
||||
ensure
|
||||
(File.unlink(tmp_path) rescue nil) if tmp_path
|
||||
end
|
||||
|
||||
def load_cache
|
||||
marshal = File.open(cache_file, "rb") { |f| Zlib::Inflate.inflate(f.read) }
|
||||
Marshal.load(marshal)
|
||||
rescue SystemCallError, ::Zlib::DataError, ::Zlib::BufError, TypeError
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def git_linguist(args)
|
||||
incremental = true
|
||||
commit = nil
|
||||
git_dir = nil
|
||||
|
||||
parser = OptionParser.new do |opts|
|
||||
opts.banner = "Usage: git-linguist [OPTIONS] stats|breakdown|dump-cache|clear|disable"
|
||||
|
||||
opts.on("-f", "--force", "Force a full rescan") { incremental = false }
|
||||
opts.on("--git-dir=DIR", "Path to the git repository") { |v| git_dir = v }
|
||||
opts.on("--commit=COMMIT", "Commit to index") { |v| commit = v}
|
||||
end
|
||||
|
||||
parser.parse!(args)
|
||||
|
||||
git_dir ||= begin
|
||||
pwd = Dir.pwd
|
||||
dotgit = File.join(pwd, ".git")
|
||||
File.directory?(dotgit) ? dotgit : pwd
|
||||
end
|
||||
|
||||
wrapper = GitLinguist.new(git_dir, commit, incremental)
|
||||
|
||||
case args.pop
|
||||
when "stats"
|
||||
wrapper.linguist do |linguist|
|
||||
puts JSON.dump(linguist.languages)
|
||||
end
|
||||
when "breakdown"
|
||||
wrapper.linguist do |linguist|
|
||||
puts JSON.dump(linguist.breakdown_by_file)
|
||||
end
|
||||
when "dump-cache"
|
||||
puts JSON.dump(wrapper.load_language_stats)
|
||||
when "clear"
|
||||
wrapper.clear_language_stats
|
||||
when "disable"
|
||||
wrapper.disable_language_stats
|
||||
else
|
||||
$stderr.print(parser.help)
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
|
||||
git_linguist(ARGV)
|
||||
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
|
||||
s.license = "MIT"
|
||||
|
||||
s.files = Dir['lib/**/*'] - ['lib/linguist/grammars.rb']
|
||||
s.executables << 'linguist'
|
||||
s.executables = ['linguist', 'git-linguist']
|
||||
|
||||
s.add_dependency 'charlock_holmes', '~> 0.7.3'
|
||||
s.add_dependency 'escape_utils', '~> 1.1.0'
|
||||
|
||||
@@ -241,22 +241,26 @@ module Linguist
|
||||
return lines[0].include?("Code generated by")
|
||||
end
|
||||
|
||||
PROTOBUF_EXTENSIONS = ['.py', '.java', '.h', '.cc', '.cpp']
|
||||
|
||||
# Internal: Is the blob a C++, Java or Python source file generated by the
|
||||
# Protocol Buffer compiler?
|
||||
#
|
||||
# Returns true of false.
|
||||
def generated_protocol_buffer?
|
||||
return false unless ['.py', '.java', '.h', '.cc', '.cpp'].include?(extname)
|
||||
return false unless PROTOBUF_EXTENSIONS.include?(extname)
|
||||
return false unless lines.count > 1
|
||||
|
||||
return lines[0].include?("Generated by the protocol buffer compiler. DO NOT EDIT!")
|
||||
end
|
||||
|
||||
APACHE_THRIFT_EXTENSIONS = ['.rb', '.py', '.go', '.js', '.m', '.java', '.h', '.cc', '.cpp']
|
||||
|
||||
# Internal: Is the blob generated by Apache Thrift compiler?
|
||||
#
|
||||
# Returns true or false
|
||||
def generated_apache_thrift?
|
||||
return false unless ['.rb', '.py', '.go', '.js', '.m', '.java', '.h', '.cc', '.cpp'].include?(extname)
|
||||
return false unless APACHE_THRIFT_EXTENSIONS.include?(extname)
|
||||
return false unless lines.count > 1
|
||||
|
||||
return lines[0].include?("Autogenerated by Thrift Compiler") || lines[1].include?("Autogenerated by Thrift Compiler")
|
||||
|
||||
@@ -56,7 +56,8 @@ module Linguist
|
||||
|
||||
# Internal: Check if this heuristic matches the candidate languages.
|
||||
def matches?(filename)
|
||||
@extensions.any? { |ext| filename.downcase.end_with?(ext) }
|
||||
filename = filename.downcase
|
||||
@extensions.any? { |ext| filename.end_with?(ext) }
|
||||
end
|
||||
|
||||
# Internal: Perform the heuristic
|
||||
|
||||
@@ -79,6 +79,10 @@ module Linguist
|
||||
@size
|
||||
end
|
||||
|
||||
def cleanup!
|
||||
@data.clear if @data
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# Returns true if the attribute is present and not the string "false".
|
||||
|
||||
@@ -157,8 +157,11 @@ module Linguist
|
||||
|
||||
blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode.to_s(8))
|
||||
|
||||
next unless blob.include_in_language_stats?
|
||||
file_map[new] = [blob.language.group.name, blob.size]
|
||||
if blob.include_in_language_stats?
|
||||
file_map[new] = [blob.language.group.name, blob.size]
|
||||
end
|
||||
|
||||
blob.cleanup!
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
module Linguist
|
||||
VERSION = "4.5.15"
|
||||
VERSION = "4.6.0"
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user