mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			173 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			173 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| require 'linguist/lazy_blob'
 | |
| require 'rugged'
 | |
| 
 | |
| module Linguist
 | |
|   # A Repository is an abstraction of a Grit::Repo or a basic file
 | |
|   # system tree. It holds a list of paths pointing to Blobish objects.
 | |
|   #
 | |
|   # Its primary purpose is for gathering language statistics across
 | |
|   # the entire project.
 | |
|   class Repository
 | |
|     attr_reader :repository
 | |
| 
 | |
|     # Public: Create a new Repository based on the stats of
 | |
|     # an existing one
 | |
|     def self.incremental(repo, commit_oid, old_commit_oid, old_stats)
 | |
|       repo = self.new(repo, commit_oid)
 | |
|       repo.load_existing_stats(old_commit_oid, old_stats)
 | |
|       repo
 | |
|     end
 | |
| 
 | |
|     # Public: Initialize a new Repository to be analyzed for language
 | |
|     # data
 | |
|     #
 | |
|     # repo - a Rugged::Repository object
 | |
|     # commit_oid - the sha1 of the commit that will be analyzed;
 | |
|     #              this is usually the master branch
 | |
|     #
 | |
|     # Returns a Repository
 | |
|     def initialize(repo, commit_oid)
 | |
|       @repository = repo
 | |
|       @commit_oid = commit_oid
 | |
| 
 | |
|       raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
 | |
|     end
 | |
| 
 | |
|     # Public: Load the results of a previous analysis on this repository
 | |
|     # to speed up the new scan.
 | |
|     #
 | |
|     # The new analysis will be performed incrementally as to only take
 | |
|     # into account the file changes since the last time the repository
 | |
|     # was scanned
 | |
|     #
 | |
|     # old_commit_oid - the sha1 of the commit that was previously analyzed
 | |
|     # old_stats - the result of the previous analysis, obtained by calling
 | |
|     #             Repository#cache on the old repository
 | |
|     #
 | |
|     # Returns nothing
 | |
|     def load_existing_stats(old_commit_oid, old_stats)
 | |
|       @old_commit_oid = old_commit_oid
 | |
|       @old_stats = old_stats
 | |
|       nil
 | |
|     end
 | |
| 
 | |
|     # Public: Returns a breakdown of language stats.
 | |
|     #
 | |
|     # Examples
 | |
|     #
 | |
|     #   # => { 'Ruby' => 46319,
 | |
|     #          'JavaScript' => 258 }
 | |
|     #
 | |
|     # Returns a Hash of language names and Integer size values.
 | |
|     def languages
 | |
|       @sizes ||= begin
 | |
|         sizes = Hash.new { 0 }
 | |
|         cache.each do |_, (language, size)|
 | |
|           sizes[language] += size
 | |
|         end
 | |
|         sizes
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     # Public: Get primary Language of repository.
 | |
|     #
 | |
|     # Returns a language name
 | |
|     def language
 | |
|       @language ||= begin
 | |
|         primary = languages.max_by { |(_, size)| size }
 | |
|         primary && primary[0]
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     # Public: Get the total size of the repository.
 | |
|     #
 | |
|     # Returns a byte size Integer
 | |
|     def size
 | |
|       @size ||= languages.inject(0) { |s,(_,v)| s + v }
 | |
|     end
 | |
| 
 | |
|     # Public: Return the language breakdown of this repository by file
 | |
|     #
 | |
|     # Returns a map of language names => [filenames...]
 | |
|     def breakdown_by_file
 | |
|       @file_breakdown ||= begin
 | |
|         breakdown = Hash.new { |h,k| h[k] = Array.new }
 | |
|         cache.each do |filename, (language, _)|
 | |
|           breakdown[language] << filename
 | |
|         end
 | |
|         breakdown
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     # Public: Return the cached results of the analysis
 | |
|     #
 | |
|     # This is a per-file breakdown that can be passed to other instances
 | |
|     # of Linguist::Repository to perform incremental scans
 | |
|     #
 | |
|     # Returns a map of filename => [language, size]
 | |
|     def cache
 | |
|       @cache ||= begin
 | |
|         if @old_commit_oid == @commit_oid
 | |
|           @old_stats
 | |
|         else
 | |
|           compute_stats(@old_commit_oid, @old_stats)
 | |
|         end
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     def read_index
 | |
|       attr_index = Rugged::Index.new
 | |
|       attr_index.read_tree(current_tree)
 | |
|       repository.index = attr_index
 | |
|     end
 | |
| 
 | |
|     def current_tree
 | |
|       @tree ||= Rugged::Commit.lookup(repository, @commit_oid).tree
 | |
|     end
 | |
| 
 | |
|     protected
 | |
| 
 | |
|     def compute_stats(old_commit_oid, cache = nil)
 | |
|       old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
 | |
| 
 | |
|       read_index
 | |
| 
 | |
|       diff = Rugged::Tree.diff(repository, old_tree, current_tree)
 | |
| 
 | |
|       # Clear file map and fetch full diff if any .gitattributes files are changed
 | |
|       if cache && diff.each_delta.any? { |delta| File.basename(delta.new_file[:path]) == ".gitattributes" }
 | |
|         diff = Rugged::Tree.diff(repository, old_tree = nil, current_tree)
 | |
|         file_map = {}
 | |
|       else
 | |
|         file_map = cache ? cache.dup : {}
 | |
|       end
 | |
| 
 | |
|       diff.each_delta do |delta|
 | |
|         old = delta.old_file[:path]
 | |
|         new = delta.new_file[:path]
 | |
| 
 | |
|         file_map.delete(old)
 | |
|         next if delta.binary
 | |
| 
 | |
|         if [:added, :modified].include? delta.status
 | |
|           # Skip submodules
 | |
|           mode = delta.new_file[:mode]
 | |
|           next if (mode & 040000) != 0
 | |
| 
 | |
|           blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode.to_s(8))
 | |
| 
 | |
|           # Skip vendored or generated blobs
 | |
|           next if blob.vendored? || blob.generated? || blob.language.nil?
 | |
| 
 | |
|           # Only include programming languages and acceptable markup languages
 | |
|           if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
 | |
|             file_map[new] = [blob.language.group.name, blob.size]
 | |
|           end
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       file_map
 | |
|     end
 | |
|   end
 | |
| end
 |