diff --git a/README.md b/README.md index 4a7ea415..6f8a3993 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Linguist supports a number of different custom overrides strategies for language ### Using gitattributes -Add a `.gitattributes` file to your project and use standard git-style path matchers for the files you want to override to set `linguist-language` and `linguist-vendored`. +Add a `.gitattributes` file to your project and use standard git-style path matchers for the files you want to override to set `linguist-documentation`, `linguist-language`, and `linguist-vendored`. ``` $ cat .gitattributes @@ -43,6 +43,16 @@ special-vendored-path/* linguist-vendored jquery.js linguist-vendored=false ``` +Similar to vendored files, Linguist excludes documentation files from your project's language stats. [lib/linguist/documentation.yml](lib/linguist/documentation.yml) lists common documentation paths and excludes them from the language statistics for your repository. + +Use the `linguist-documentation` attribute to mark or unmark paths as documentation. + +``` +$ cat .gitattributes +project-docs/* linguist-documentation +docs/formatter.rb linguist-documentation=false +``` + ### Using Emacs and Vim modelines Alternatively, you can use Vim and Emacs style modelines to set the language for a single file. Modelines can be placed anywhere within a file and are respected when determining how to syntax-highlight a file on GitHub.com diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index c368b4d0..ff11aefd 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -236,6 +236,21 @@ module Linguist name =~ VendoredRegexp ? true : false end + documentation_paths = YAML.load_file(File.expand_path("../documentation.yml", __FILE__)) + DocumentationRegexp = Regexp.new(documentation_paths.join('|')) + + # Public: Is the blob in a documentation directory? + # + # Documentation files are ignored by language statistics. + # + # See "documentation.yml" for a list of documentation conventions that match + # this pattern. + # + # Return true or false + def documentation? + name =~ DocumentationRegexp ? true : false + end + # Public: Get each line of data # # Requires Blob#data diff --git a/lib/linguist/documentation.yml b/lib/linguist/documentation.yml new file mode 100644 index 00000000..57fc6151 --- /dev/null +++ b/lib/linguist/documentation.yml @@ -0,0 +1,18 @@ +# Documentation files and directories are excluded from language +# statistics. +# +# Lines in this file are Regexps that are matched against the file +# pathname. +# +# Please add additional test coverage to +# `test/test_blob.rb#test_documentation` if you make any changes. + +## Documentation Conventions ## + +- ^docs?/ +- ^Documentation/ + +- (^|/)CONTRIBUTING(\.|$) +- (^|/)COPYING(\.|$) +- (^|/)LICEN[CS]E(\.|$) +- (^|/)README(\.|$) diff --git a/lib/linguist/lazy_blob.rb b/lib/linguist/lazy_blob.rb index 9691bca5..5465a71f 100644 --- a/lib/linguist/lazy_blob.rb +++ b/lib/linguist/lazy_blob.rb @@ -4,7 +4,7 @@ require 'rugged' module Linguist class LazyBlob - GIT_ATTR = ['linguist-language', 'linguist-vendored'] + GIT_ATTR = ['linguist-documentation', 'linguist-language', 'linguist-vendored'] GIT_ATTR_OPTS = { :priority => [:index], :skip_system => true } GIT_ATTR_FLAGS = Rugged::Repository::Attributes.parse_opts(GIT_ATTR_OPTS) @@ -37,6 +37,14 @@ module Linguist end end + def documentation? + if attr = git_attributes['linguist-documentation'] + boolean_attribute(attr) + else + super + end + end + def language return @language if defined?(@language) diff --git a/lib/linguist/repository.rb b/lib/linguist/repository.rb index 3c197fad..3837977f 100644 --- a/lib/linguist/repository.rb +++ b/lib/linguist/repository.rb @@ -159,7 +159,7 @@ module Linguist blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode.to_s(8)) # Skip vendored or generated blobs - next if blob.vendored? || blob.generated? || blob.language.nil? + next if blob.vendored? || blob.documentation? || blob.generated? || blob.language.nil? if DETECTABLE_TYPES.include?(blob.language.type) file_map[new] = [blob.language.group.name, blob.size] diff --git a/test/test_blob.rb b/test/test_blob.rb index ceb54bb3..fabd5a74 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -441,6 +441,38 @@ class TestBlob < Minitest::Test assert sample_blob("subproject/activator.bat").vendored? end + def test_documentation + assert_predicate fixture_blob("doc/foo.md"), :documentation? + assert_predicate fixture_blob("docs/foo.md"), :documentation? + refute_predicate fixture_blob("project/doc/foo.md"), :documentation? + refute_predicate fixture_blob("project/docs/foo.md"), :documentation? + + assert_predicate fixture_blob("Documentation/foo.md"), :documentation? + refute_predicate fixture_blob("project/Documentation/foo.md"), :documentation? + + assert_predicate fixture_blob("README"), :documentation? + assert_predicate fixture_blob("README.md"), :documentation? + assert_predicate fixture_blob("README.txt"), :documentation? + assert_predicate fixture_blob("foo/README"), :documentation? + + assert_predicate fixture_blob("CONTRIBUTING"), :documentation? + assert_predicate fixture_blob("CONTRIBUTING.md"), :documentation? + assert_predicate fixture_blob("CONTRIBUTING.txt"), :documentation? + assert_predicate fixture_blob("foo/CONTRIBUTING"), :documentation? + + assert_predicate fixture_blob("LICENSE"), :documentation? + assert_predicate fixture_blob("LICENCE.md"), :documentation? + assert_predicate fixture_blob("LICENSE.txt"), :documentation? + assert_predicate fixture_blob("foo/LICENSE"), :documentation? + + assert_predicate fixture_blob("COPYING"), :documentation? + assert_predicate fixture_blob("COPYING.md"), :documentation? + assert_predicate fixture_blob("COPYING.txt"), :documentation? + assert_predicate fixture_blob("foo/COPYING"), :documentation? + + refute_predicate fixture_blob("foo.md"), :documentation? + end + def test_language Samples.each do |sample| blob = sample_blob(sample[:path]) diff --git a/test/test_repository.rb b/test/test_repository.rb index b661668d..fcdd4f0c 100644 --- a/test/test_repository.rb +++ b/test/test_repository.rb @@ -99,4 +99,16 @@ class TestRepository < Minitest::Test # overridden .gitattributes assert !override_unvendored.vendored? end + + def test_linguist_override_documentation? + attr_commit = "d4c8fb8a28e91f97a7e53428a365c0abbac36d3d" + repo = linguist_repo(attr_commit).read_index + + readme = Linguist::LazyBlob.new(rugged_repository, attr_commit, "README.md") + arduino = Linguist::LazyBlob.new(rugged_repository, attr_commit, "samples/Arduino/hello.ino") + + # overridden by .gitattributes + refute_predicate readme, :documentation? + assert_predicate arduino, :documentation? + end end