diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..e69de29b diff --git a/Rakefile b/Rakefile index 9c3aa2ee..bd5788f6 100644 --- a/Rakefile +++ b/Rakefile @@ -2,7 +2,6 @@ require 'json' require 'rake/clean' require 'rake/testtask' require 'yaml' -require 'pry' task :default => :test diff --git a/lib/linguist/lazy_blob.rb b/lib/linguist/lazy_blob.rb index c7031283..807312f9 100644 --- a/lib/linguist/lazy_blob.rb +++ b/lib/linguist/lazy_blob.rb @@ -4,7 +4,7 @@ require 'rugged' module Linguist class LazyBlob - GIT_ATTR = ['linguist-ignore', 'linguist-lang'] + GIT_ATTR = ['linguist-language', 'linguist-vendored', 'linguist-generated'] GIT_ATTR_OPTS = { :priority => [:index], :skip_system => true } GIT_ATTR_FLAGS = Rugged::Repository::Attributes.parse_opts(GIT_ATTR_OPTS) @@ -29,18 +29,30 @@ module Linguist name, GIT_ATTR, GIT_ATTR_FLAGS) end - def ignored? - !!git_attributes['linguist-ignore'] + def vendored? + if attr = git_attributes['linguist-vendored'] + return boolean_attribute(attr) + else + return super + end end - def overriden_language - if lang = git_attributes['linguist-lang'] - Language.find_by_name(lang) + def generated? + if attr = git_attributes['linguist-generated'] + return boolean_attribute(attr) + else + return super end end def language - @language ||= (overriden_language || Language.detect(self)) + return @language if defined?(@language) + + @language = if lang = git_attributes['linguist-language'] + Language.find_by_name(lang) + else + super + end end def data @@ -54,6 +66,12 @@ module Linguist end protected + + # Returns true if the attribute is present and not the string "false". + def boolean_attribute(attr) + attr != "false" + end + def load_blob! @data, @size = Rugged::Blob.to_buffer(repository, oid, MAX_SIZE) if @data.nil? end diff --git a/lib/linguist/repository.rb b/lib/linguist/repository.rb index 9e0b7f1e..1f9e09c4 100644 --- a/lib/linguist/repository.rb +++ b/lib/linguist/repository.rb @@ -110,22 +110,30 @@ module Linguist if @old_commit_oid == @commit_oid @old_stats else - compute_stats(@old_commit_oid, @commit_oid, @old_stats) + compute_stats(@old_commit_oid, @old_stats) end end end + def read_index + attr_index = Rugged::Index.new + attr_index.read_tree(current_tree) + repository.index = attr_index + end + + def current_tree + @tree ||= Rugged::Commit.lookup(repository, @commit_oid).tree + end + protected - def compute_stats(old_commit_oid, commit_oid, cache = nil) + + def compute_stats(old_commit_oid, cache = nil) file_map = cache ? cache.dup : {} old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree - new_tree = Rugged::Commit.lookup(repository, commit_oid).tree - diff = Rugged::Tree.diff(repository, old_tree, new_tree) + read_index - attr_index = Rugged::Index.new - attr_index.read_tree(new_tree) - repository.index = attr_index + diff = Rugged::Tree.diff(repository, old_tree, current_tree) diff.each_delta do |delta| old = delta.old_file[:path] @@ -142,7 +150,7 @@ module Linguist blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode.to_s(8)) # Skip vendored or generated blobs - next if blob.ignored? || blob.vendored? || blob.generated? || blob.language.nil? + next if blob.vendored? || blob.generated? || blob.language.nil? # Only include programming languages and acceptable markup languages if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name) diff --git a/test/fixtures/foo.rb b/test/fixtures/foo.rb new file mode 100644 index 00000000..799c84bd --- /dev/null +++ b/test/fixtures/foo.rb @@ -0,0 +1,3 @@ +def foo + return "BAR" +end diff --git a/test/test_repository.rb b/test/test_repository.rb index f2636871..55ba91ad 100644 --- a/test/test_repository.rb +++ b/test/test_repository.rb @@ -1,4 +1,5 @@ require 'linguist/repository' +require 'linguist/lazy_blob' require 'test/unit' class TestRepository < Test::Unit::TestCase @@ -47,13 +48,17 @@ class TestRepository < Test::Unit::TestCase assert_equal linguist_repo.cache, new_repo.cache end - def test_git_attributes - # See https://github.com/github/linguist/blob/525304738ebdb7ab3b7d2bf9a7514cc428faa273/.gitattributes + def test_repo_git_attributes + # See https://github.com/github/linguist/blob/7ee006cbcb2d7261f9e648510a684ee9ac64126b/.gitattributes # # It looks like this: - # test/*.rb linguist-ignore - # lib/linguist.rb linguist-lang=Java - attr_commit = '525304738ebdb7ab3b7d2bf9a7514cc428faa273' + # Gemfile linguist-vendored=true + # lib/linguist.rb linguist-language=Java + # test/*.rb linguist-language=Java + # Rakefile linguist-generated + # test/fixtures/* linguist-vendored=false + + attr_commit = '7ee006cbcb2d7261f9e648510a684ee9ac64126b' repo = linguist_repo(attr_commit) assert repo.breakdown_by_file.has_key?("Java") @@ -61,8 +66,36 @@ class TestRepository < Test::Unit::TestCase assert repo.breakdown_by_file.has_key?("Ruby") assert !repo.breakdown_by_file["Ruby"].empty? - repo.breakdown_by_file["Ruby"].each do |file| - assert !file.start_with?("test/") - end + end + + def test_linguist_override_generated? + attr_commit = '7ee006cbcb2d7261f9e648510a684ee9ac64126b' + linguist_repo(attr_commit).read_index + + file = Linguist::LazyBlob.new(rugged_repository, attr_commit, 'Rakefile') + + # overridden in .gitattributes + assert file.generated? + end + + def test_linguist_override_vendored? + attr_commit = '7ee006cbcb2d7261f9e648510a684ee9ac64126b' + repo = linguist_repo(attr_commit).read_index + + override_vendored = Linguist::LazyBlob.new(rugged_repository, attr_commit, 'Gemfile') + + # overridden .gitattributes + assert override_vendored.vendored? + end + + def test_linguist_override_unvendored? + attr_commit = '7ee006cbcb2d7261f9e648510a684ee9ac64126b' + repo = linguist_repo(attr_commit).read_index + + # lib/linguist/vendor.yml defines this as vendored. + override_unvendored = Linguist::LazyBlob.new(rugged_repository, attr_commit, 'test/fixtures/foo.rb') + + # overridden .gitattributes + assert !override_unvendored.vendored? end end