mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 09:40:21 +00:00
Exclude documentation files from language statistics
Documentation is an important part of a software project but is not generally thought of as part of the code for that project. Repository language statistics are used to quantify the project's code, so it makes sense to exclude documentation from those computations. Documentation files are recognized similarly to vendored files. lib/linguist/documentation.yml contains regular expressions to match common names for documentation files. A new linguist-documentation Git attribute can be used to override those conventions.
This commit is contained in:
12
README.md
12
README.md
@@ -26,7 +26,7 @@ Linguist supports a number of different custom overrides strategies for language
|
||||
|
||||
### Using gitattributes
|
||||
|
||||
Add a `.gitattributes` file to your project and use standard git-style path matchers for the files you want to override to set `linguist-language` and `linguist-vendored`.
|
||||
Add a `.gitattributes` file to your project and use standard git-style path matchers for the files you want to override to set `linguist-documentation`, `linguist-language`, and `linguist-vendored`.
|
||||
|
||||
```
|
||||
$ cat .gitattributes
|
||||
@@ -43,6 +43,16 @@ special-vendored-path/* linguist-vendored
|
||||
jquery.js linguist-vendored=false
|
||||
```
|
||||
|
||||
Similar to vendored files, Linguist excludes documentation files from your project's language stats. [lib/linguist/documentation.yml](lib/linguist/documentation.yml) lists common documentation paths and excludes them from the language statistics for your repository.
|
||||
|
||||
Use the `linguist-documentation` attribute to mark or unmark paths as documentation.
|
||||
|
||||
```
|
||||
$ cat .gitattributes
|
||||
project-docs/* linguist-documentation
|
||||
docs/formatter.rb linguist-documentation=false
|
||||
```
|
||||
|
||||
### Using Emacs and Vim modelines
|
||||
|
||||
Alternatively, you can use Vim and Emacs style modelines to set the language for a single file. Modelines can be placed anywhere within a file and are respected when determining how to syntax-highlight a file on GitHub.com
|
||||
|
||||
@@ -236,6 +236,21 @@ module Linguist
|
||||
name =~ VendoredRegexp ? true : false
|
||||
end
|
||||
|
||||
documentation_paths = YAML.load_file(File.expand_path("../documentation.yml", __FILE__))
|
||||
DocumentationRegexp = Regexp.new(documentation_paths.join('|'))
|
||||
|
||||
# Public: Is the blob in a documentation directory?
|
||||
#
|
||||
# Documentation files are ignored by language statistics.
|
||||
#
|
||||
# See "documentation.yml" for a list of documentation conventions that match
|
||||
# this pattern.
|
||||
#
|
||||
# Return true or false
|
||||
def documentation?
|
||||
name =~ DocumentationRegexp ? true : false
|
||||
end
|
||||
|
||||
# Public: Get each line of data
|
||||
#
|
||||
# Requires Blob#data
|
||||
|
||||
18
lib/linguist/documentation.yml
Normal file
18
lib/linguist/documentation.yml
Normal file
@@ -0,0 +1,18 @@
|
||||
# Documentation files and directories are excluded from language
|
||||
# statistics.
|
||||
#
|
||||
# Lines in this file are Regexps that are matched against the file
|
||||
# pathname.
|
||||
#
|
||||
# Please add additional test coverage to
|
||||
# `test/test_blob.rb#test_documentation` if you make any changes.
|
||||
|
||||
## Documentation Conventions ##
|
||||
|
||||
- ^docs?/
|
||||
- ^Documentation/
|
||||
|
||||
- (^|/)CONTRIBUTING(\.|$)
|
||||
- (^|/)COPYING(\.|$)
|
||||
- (^|/)LICEN[CS]E(\.|$)
|
||||
- (^|/)README(\.|$)
|
||||
@@ -4,7 +4,7 @@ require 'rugged'
|
||||
|
||||
module Linguist
|
||||
class LazyBlob
|
||||
GIT_ATTR = ['linguist-language', 'linguist-vendored']
|
||||
GIT_ATTR = ['linguist-documentation', 'linguist-language', 'linguist-vendored']
|
||||
GIT_ATTR_OPTS = { :priority => [:index], :skip_system => true }
|
||||
GIT_ATTR_FLAGS = Rugged::Repository::Attributes.parse_opts(GIT_ATTR_OPTS)
|
||||
|
||||
@@ -37,6 +37,14 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
def documentation?
|
||||
if attr = git_attributes['linguist-documentation']
|
||||
boolean_attribute(attr)
|
||||
else
|
||||
super
|
||||
end
|
||||
end
|
||||
|
||||
def language
|
||||
return @language if defined?(@language)
|
||||
|
||||
|
||||
@@ -159,7 +159,7 @@ module Linguist
|
||||
blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode.to_s(8))
|
||||
|
||||
# Skip vendored or generated blobs
|
||||
next if blob.vendored? || blob.generated? || blob.language.nil?
|
||||
next if blob.vendored? || blob.documentation? || blob.generated? || blob.language.nil?
|
||||
|
||||
if DETECTABLE_TYPES.include?(blob.language.type)
|
||||
file_map[new] = [blob.language.group.name, blob.size]
|
||||
|
||||
@@ -441,6 +441,38 @@ class TestBlob < Minitest::Test
|
||||
assert sample_blob("subproject/activator.bat").vendored?
|
||||
end
|
||||
|
||||
def test_documentation
|
||||
assert_predicate fixture_blob("doc/foo.md"), :documentation?
|
||||
assert_predicate fixture_blob("docs/foo.md"), :documentation?
|
||||
refute_predicate fixture_blob("project/doc/foo.md"), :documentation?
|
||||
refute_predicate fixture_blob("project/docs/foo.md"), :documentation?
|
||||
|
||||
assert_predicate fixture_blob("Documentation/foo.md"), :documentation?
|
||||
refute_predicate fixture_blob("project/Documentation/foo.md"), :documentation?
|
||||
|
||||
assert_predicate fixture_blob("README"), :documentation?
|
||||
assert_predicate fixture_blob("README.md"), :documentation?
|
||||
assert_predicate fixture_blob("README.txt"), :documentation?
|
||||
assert_predicate fixture_blob("foo/README"), :documentation?
|
||||
|
||||
assert_predicate fixture_blob("CONTRIBUTING"), :documentation?
|
||||
assert_predicate fixture_blob("CONTRIBUTING.md"), :documentation?
|
||||
assert_predicate fixture_blob("CONTRIBUTING.txt"), :documentation?
|
||||
assert_predicate fixture_blob("foo/CONTRIBUTING"), :documentation?
|
||||
|
||||
assert_predicate fixture_blob("LICENSE"), :documentation?
|
||||
assert_predicate fixture_blob("LICENCE.md"), :documentation?
|
||||
assert_predicate fixture_blob("LICENSE.txt"), :documentation?
|
||||
assert_predicate fixture_blob("foo/LICENSE"), :documentation?
|
||||
|
||||
assert_predicate fixture_blob("COPYING"), :documentation?
|
||||
assert_predicate fixture_blob("COPYING.md"), :documentation?
|
||||
assert_predicate fixture_blob("COPYING.txt"), :documentation?
|
||||
assert_predicate fixture_blob("foo/COPYING"), :documentation?
|
||||
|
||||
refute_predicate fixture_blob("foo.md"), :documentation?
|
||||
end
|
||||
|
||||
def test_language
|
||||
Samples.each do |sample|
|
||||
blob = sample_blob(sample[:path])
|
||||
|
||||
@@ -99,4 +99,16 @@ class TestRepository < Minitest::Test
|
||||
# overridden .gitattributes
|
||||
assert !override_unvendored.vendored?
|
||||
end
|
||||
|
||||
def test_linguist_override_documentation?
|
||||
attr_commit = "d4c8fb8a28e91f97a7e53428a365c0abbac36d3d"
|
||||
repo = linguist_repo(attr_commit).read_index
|
||||
|
||||
readme = Linguist::LazyBlob.new(rugged_repository, attr_commit, "README.md")
|
||||
arduino = Linguist::LazyBlob.new(rugged_repository, attr_commit, "samples/Arduino/hello.ino")
|
||||
|
||||
# overridden by .gitattributes
|
||||
refute_predicate readme, :documentation?
|
||||
assert_predicate arduino, :documentation?
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user