From 8da6ddf9d97ee1cd1d7119f3e8a5249df7d1590a Mon Sep 17 00:00:00 2001 From: Seppe Stas Date: Tue, 23 Jan 2018 13:17:48 +0100 Subject: [PATCH] Override languages being included by language statistics (#3807) * Add detectable key to languages This key allows to override the language being included in the language stats of a repository. * Make detectable override-able using .gitattributes * Mention `linguist-detectable` in README * Remove detectable key from languages Reverts changes in 0f7c0df5. * Update commit hash to the one that was merged PR #3806 changed the commit hash. The original commit was not actually merged into the test/attributes branch. * Fix check to ensure detectable is defined * Add include in language stats tests when detectable set * Ignore detectable when vendored, documentation or overridden * Add documentation on detectable override in README * Improve documentation on detectable override in README --- README.md | 15 ++++++++++++++- lib/linguist/blob_helper.rb | 5 ++++- lib/linguist/lazy_blob.rb | 11 ++++++++++- test/test_blob.rb | 31 +++++++++++++++++++++++++++++++ test/test_repository.rb | 12 ++++++++++++ 5 files changed, 71 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cf39fe16..98f29069 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ Linguist supports a number of different custom override strategies for language ### Using gitattributes -Add a `.gitattributes` file to your project and use standard git-style path matchers for the files you want to override using the `linguist-documentation`, `linguist-language`, `linguist-vendored`, and `linguist-generated` attributes. `.gitattributes` will be used to determine language statistics and will be used to syntax highlight files. You can also manually set syntax highlighting using [Vim or Emacs modelines](#using-emacs-or-vim-modelines). +Add a `.gitattributes` file to your project and use standard git-style path matchers for the files you want to override using the `linguist-documentation`, `linguist-language`, `linguist-vendored`, `linguist-generated` and `linguist-detectable` attributes. `.gitattributes` will be used to determine language statistics and will be used to syntax highlight files. You can also manually set syntax highlighting using [Vim or Emacs modelines](#using-emacs-or-vim-modelines). ``` $ cat .gitattributes @@ -166,6 +166,19 @@ $ cat .gitattributes Api.elm linguist-generated=true ``` +#### Detectable + +Only programming languages are included in the language statistics. Languages of a different type (as defined in [`languages.yml`](/lib/linguist/languages.yml)) are not "detectable" causing them not to be included in the language statistics. + +Use the `linguist-detectable` attribute to mark or unmark paths as detectable. + +``` +$ cat .gitattributes +*.kicad_pcb linguist-detectable=true +*.sch linguist-detectable=true +tools/export_bom.py linguist-detectable=false +``` + ### Using Emacs or Vim modelines If you do not want to use `.gitattributes` to override the syntax highlighting used on GitHub.com, you can use Vim or Emacs style modelines to set the language for a single file. Modelines can be placed anywhere within a file and are respected when determining how to syntax-highlight a file on GitHub.com diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index dc2f94ee..c78dd7d0 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -383,7 +383,10 @@ module Linguist !vendored? && !documentation? && !generated? && - language && DETECTABLE_TYPES.include?(language.type) + language && ( defined?(detectable?) && !detectable?.nil? ? + detectable? : + DETECTABLE_TYPES.include?(language.type) + ) end end end diff --git a/lib/linguist/lazy_blob.rb b/lib/linguist/lazy_blob.rb index d93fb6f7..389cc243 100644 --- a/lib/linguist/lazy_blob.rb +++ b/lib/linguist/lazy_blob.rb @@ -7,7 +7,8 @@ module Linguist GIT_ATTR = ['linguist-documentation', 'linguist-language', 'linguist-vendored', - 'linguist-generated'] + 'linguist-generated', + 'linguist-detectable'] GIT_ATTR_OPTS = { :priority => [:index], :skip_system => true } GIT_ATTR_FLAGS = Rugged::Repository::Attributes.parse_opts(GIT_ATTR_OPTS) @@ -70,6 +71,14 @@ module Linguist end end + def detectable? + if attr = git_attributes['linguist-detectable'] + return boolean_attribute(attr) + else + nil + end + end + def data load_blob! @data diff --git a/test/test_blob.rb b/test/test_blob.rb index 844f0d2d..ca05ec4d 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -307,5 +307,36 @@ class TestBlob < Minitest::Test included = sample_blob_memory("HTML/pages.html") assert_predicate included, :include_in_language_stats? + + # Test detectable override (i.e by .gitattributes) + + def prose.detectable?; true end + assert_predicate prose, :include_in_language_stats? + + included_not_detectable = included.clone() + def included_not_detectable.detectable?; false end + refute_predicate included_not_detectable, :include_in_language_stats? + + # Test not included if vendored, documentation or generated overridden + # even if detectable + + included_vendored = included.clone() + def included_vendored.vendored?; true end + refute_predicate included_vendored, :include_in_language_stats? + def included_vendored.detectable?; true end + refute_predicate included_vendored, :include_in_language_stats? + + included_documentation = included.clone() + def included_documentation.documentation?; true end + refute_predicate included_documentation, :include_in_language_stats? + def included_documentation.detectable?; true end + refute_predicate included_documentation, :include_in_language_stats? + + included_generated = included.clone() + def included_generated.generated?; true end + refute_predicate included_generated, :include_in_language_stats? + def included_generated.detectable?; true end + refute_predicate included_generated, :include_in_language_stats? + end end diff --git a/test/test_repository.rb b/test/test_repository.rb index 0a81fae1..fa73ae1b 100644 --- a/test/test_repository.rb +++ b/test/test_repository.rb @@ -121,4 +121,16 @@ class TestRepository < Minitest::Test # overridden .gitattributes assert rakefile.generated? end + + def test_linguist_override_detectable? + attr_commit = "8f86998866f6f2c8aa14e0dd430e61fd25cff720" + linguist_repo(attr_commit).read_index + + # markdown is overridden by .gitattributes to be detectable, html to not be detectable + markdown = Linguist::LazyBlob.new(rugged_repository, attr_commit, "samples/Markdown/tender.md") + html = Linguist::LazyBlob.new(rugged_repository, attr_commit, "samples/HTML/pages.html") + + assert_predicate markdown, :detectable? + refute_predicate html, :detectable? + end end