From ffc09a459678960b5d04af31dcad013a30ab0fe4 Mon Sep 17 00:00:00 2001 From: Henrik Hodne Date: Sun, 3 Jul 2011 12:56:15 +0200 Subject: [PATCH 01/15] Fixed a spelling error --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e0902385..f7be3c1f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ We use this library at GitHub to detect blob languages, highlight code, ignore b ### Language detection -Linguist defines the list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). In order for a file to be hightlighed, a language and lexer must be defined there. +Linguist defines the list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). In order for a file to be highlighted, a language and lexer must be defined there. Most languages are detected by their file extension. This is the fastest and most common situation. For script files, which are usually extensionless, we do "deep content inspection"™ and check the shebang of the file. Checking the file's contents may also be used for disambiguating languages. C, C++ and Obj-C all use `.h` files. Looking for common keywords, we are usually able to guess the correct language. From cc3692b32093a20876c898a5ab4a9aa40a15822c Mon Sep 17 00:00:00 2001 From: bilderbuchi Date: Sun, 3 Jul 2011 18:44:32 +0800 Subject: [PATCH 02/15] Add simple test fixtures for Matlab. --- test/fixtures/matlab_function.m | 9 +++++++++ test/fixtures/matlab_script.m | 12 ++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 test/fixtures/matlab_function.m create mode 100644 test/fixtures/matlab_script.m diff --git a/test/fixtures/matlab_function.m b/test/fixtures/matlab_function.m new file mode 100644 index 00000000..b73502e2 --- /dev/null +++ b/test/fixtures/matlab_function.m @@ -0,0 +1,9 @@ +function ret = matlab_function(A,B) +% Simple function adding two values and displaying the return value + +ret = A+B; +% Display the return value +disp('Return value in function'); +disp(ret); + + diff --git a/test/fixtures/matlab_script.m b/test/fixtures/matlab_script.m new file mode 100644 index 00000000..89f1d956 --- /dev/null +++ b/test/fixtures/matlab_script.m @@ -0,0 +1,12 @@ +% Matlab example script + +%Call matlab_function function which resides in the same directory + +value1 = 5 % semicolon at end of line is not mandatory, only suppresses output to command line. +value2 = 3 + +% Calculate sum of value1 and value2 +result = matlab_function(value1,value2); + +disp('called from script') +disp(result); From 5ecc4421d7df3dce927d698024a43e0fc1450af0 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 09:34:33 -0500 Subject: [PATCH 03/15] Basic Matlab detection Fixes #15 --- lib/linguist/blob_helper.rb | 34 ++++++++++++++++++++++++++++++++++ test/test_blob.rb | 6 ++++++ 2 files changed, 40 insertions(+) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 538fe5c6..f18a9204 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -278,6 +278,9 @@ module Linguist # If its a header file (.h) try to guess the language header_language || + # If it's a .m file, try to guess the language + m_language || + # If it's a .r file, try to guess the language r_language || @@ -310,6 +313,37 @@ module Linguist end end + # Internal: Guess language of .m files. + # + # Objective-C heuristics: + # * Keywords + # + # Matlab heuristics: + # * Leading function keyword + # * "%" comments + # + # Returns a Language. + def m_language + return unless extname == '.m' + + # Objective-C keywords + if lines.grep(/^#import|@(interface|implementation|property|synthesize|end)/).any? + Language['Objective-C'] + + # File function + elsif lines.first.to_s =~ /^function / + Language['Matlab'] + + # Matlab comment + elsif lines.grep(/^%/).any? + Language['Matlab'] + + # Fallback to Objective-C, don't want any Matlab false positives + else + Language['Objective-C'] + end + end + # Internal: Guess language of .r files. # # Returns a Language. diff --git a/test/test_blob.rb b/test/test_blob.rb index 0a89031e..ff045d9c 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -234,6 +234,12 @@ class TestBlob < Test::Unit::TestCase assert_equal Language['R'], blob("hello-r.R").language assert_equal Language['Rebol'], blob("hello-rebol.r").language + # .m disambiguation + assert_equal Language['Objective-C'], blob("Foo.m").language + assert_equal Language['Objective-C'], blob("hello.m").language + assert_equal Language['Matlab'], blob("matlab_function.m").language + # assert_equal Language['Matlab'], blob("matlab_script.m").language + # ML assert_equal Language['OCaml'], blob("Foo.ml").language assert_equal Language['Standard ML'], blob("Foo.sig").language From 111ebe3c806d04283e80633deccbd941c9ec6854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C5=BDlender?= Date: Tue, 5 Jul 2011 09:43:48 -0500 Subject: [PATCH 04/15] Identify php files by first line Fixes #32 --- lib/linguist/blob_helper.rb | 17 +++++ test/fixtures/drupal.module | 140 ++++++++++++++++++++++++++++++++++++ test/test_blob.rb | 3 + 3 files changed, 160 insertions(+) create mode 100644 test/fixtures/drupal.module diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index f18a9204..5f843ac6 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -287,6 +287,9 @@ module Linguist # See if there is a Language for the extension pathname.language || + # Look for idioms in first line + first_line_language || + # Try to detect Language from shebang line shebang_language end @@ -357,6 +360,20 @@ module Linguist end end + # Internal: Guess language from the first line. + # + # Look for leading "' . t('About') . ''; + $output .= '

' . t('The PHP filter module adds a PHP filter to your site, for use with text formats. This filter adds the ability to execute PHP code in any text field that uses a text format (such as the body of a content item or the text of a comment). PHP is a general-purpose scripting language widely-used for web development, and is the language with which Drupal has been developed. For more information, see the online handbook entry for the PHP filter module.', array('@filter' => url('admin/help/filter'), '@php-net' => 'http://www.php.net', '@php' => 'http://drupal.org/handbook/modules/php/')) . '

'; + $output .= '

' . t('Uses') . '

'; + $output .= '
'; + $output .= '
' . t('Enabling execution of PHP in text fields') . '
'; + $output .= '
' . t('The PHP filter module allows users with the proper permissions to include custom PHP code that will get executed when pages of your site are processed. While this is a powerful and flexible feature if used by a trusted user with PHP experience, it is a significant and dangerous security risk in the hands of a malicious or inexperienced user. Even a trusted user may accidentally compromise the site by entering malformed or incorrect PHP code. Only the most trusted users should be granted permission to use the PHP filter, and all PHP code added through the PHP filter should be carefully examined before use. Example PHP snippets can be found on Drupal.org.', array('@php-snippets' => url('http://drupal.org/handbook/customization/php-snippets'))) . '
'; + $output .= '
'; + return $output; + } +} + +/** + * Implements hook_permission(). + */ +function php_permission() { + return array( + 'use PHP for settings' => array( + 'title' => t('Use PHP for settings'), + 'restrict access' => TRUE, + ), + ); +} + +/** + * Evaluate a string of PHP code. + * + * This is a wrapper around PHP's eval(). It uses output buffering to capture both + * returned and printed text. Unlike eval(), we require code to be surrounded by + * tags; in other words, we evaluate the code as if it were a stand-alone + * PHP file. + * + * Using this wrapper also ensures that the PHP code which is evaluated can not + * overwrite any variables in the calling code, unlike a regular eval() call. + * + * @param $code + * The code to evaluate. + * @return + * A string containing the printed output of the code, followed by the returned + * output of the code. + * + * @ingroup php_wrappers + */ +function php_eval($code) { + global $theme_path, $theme_info, $conf; + + // Store current theme path. + $old_theme_path = $theme_path; + + // Restore theme_path to the theme, as long as php_eval() executes, + // so code evaluated will not see the caller module as the current theme. + // If theme info is not initialized get the path from theme_default. + if (!isset($theme_info)) { + $theme_path = drupal_get_path('theme', $conf['theme_default']); + } + else { + $theme_path = dirname($theme_info->filename); + } + + ob_start(); + print eval('?>' . $code); + $output = ob_get_contents(); + ob_end_clean(); + + // Recover original theme path. + $theme_path = $old_theme_path; + + return $output; +} + +/** + * Tips callback for php filter. + */ +function _php_filter_tips($filter, $format, $long = FALSE) { + global $base_url; + if ($long) { + $output = '

' . t('Using custom PHP code') . '

'; + $output .= '

' . t('Custom PHP code may be embedded in some types of site content, including posts and blocks. While embedding PHP code inside a post or block is a powerful and flexible feature when used by a trusted user with PHP experience, it is a significant and dangerous security risk when used improperly. Even a small mistake when posting PHP code may accidentally compromise your site.') . '

'; + $output .= '

' . t('If you are unfamiliar with PHP, SQL, or Drupal, avoid using custom PHP code within posts. Experimenting with PHP may corrupt your database, render your site inoperable, or significantly compromise security.') . '

'; + $output .= '

' . t('Notes:') . '

'; + $output .= '
  • ' . t('Remember to double-check each line for syntax and logic errors before saving.') . '
  • '; + $output .= '
  • ' . t('Statements must be correctly terminated with semicolons.') . '
  • '; + $output .= '
  • ' . t('Global variables used within your PHP code retain their values after your script executes.') . '
  • '; + $output .= '
  • ' . t('register_globals is turned off. If you need to use forms, understand and use the functions in the Drupal Form API.', array('@formapi' => url('http://api.drupal.org/api/group/form_api/7'))) . '
  • '; + $output .= '
  • ' . t('Use a print or return statement in your code to output content.') . '
  • '; + $output .= '
  • ' . t('Develop and test your PHP code using a separate test script and sample database before deploying on a production site.') . '
  • '; + $output .= '
  • ' . t('Consider including your custom PHP code within a site-specific module or template.php file rather than embedding it directly into a post or block.') . '
  • '; + $output .= '
  • ' . t('Be aware that the ability to embed PHP code within content is provided by the PHP Filter module. If this module is disabled or deleted, then blocks and posts with embedded PHP may display, rather than execute, the PHP code.') . '
'; + $output .= '

' . t('A basic example: Creating a "Welcome" block that greets visitors with a simple message.') . '

'; + $output .= '
  • ' . t('

    Add a custom block to your site, named "Welcome" . With its text format set to "PHP code" (or another format supporting PHP input), add the following in the Block body:

    +
    +print t(\'Welcome visitor! Thank you for visiting.\');
    +
    ') . '
  • '; + $output .= '
  • ' . t('

    To display the name of a registered user, use this instead:

    +
    +global $user;
    +if ($user->uid) {
    +  print t(\'Welcome @name! Thank you for visiting.\', array(\'@name\' => format_username($user)));
    +}
    +else {
    +  print t(\'Welcome visitor! Thank you for visiting.\');
    +}
    +
    ') . '
'; + $output .= '

' . t('Drupal.org offers some example PHP snippets, or you can create your own with some PHP experience and knowledge of the Drupal system.', array('@drupal' => url('http://drupal.org'), '@php-snippets' => url('http://drupal.org/handbook/customization/php-snippets'))) . '

'; + return $output; + } + else { + return t('You may post PHP code. You should include <?php ?> tags.'); + } +} + +/** + * Implements hook_filter_info(). + * + * Provide PHP code filter. Use with care. + */ +function php_filter_info() { + $filters['php_code'] = array( + 'title' => t('PHP evaluator'), + 'description' => t('Executes a piece of PHP code. The usage of this filter should be restricted to administrators only!'), + 'process callback' => 'php_eval', + 'tips callback' => '_php_filter_tips', + 'cache' => FALSE, + ); + return $filters; +} + diff --git a/test/test_blob.rb b/test/test_blob.rb index ff045d9c..6c3f8757 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -278,6 +278,9 @@ class TestBlob < Test::Unit::TestCase # http://docs.racket-lang.org/scribble/ assert_equal Language['Racket'], blob("scribble.scrbl").language + + # https://github.com/drupal/drupal/blob/7.x/modules/php/php.module + assert_equal Language['PHP'], blob("drupal.module").language end def test_lexer From aaed4ee1d49578826207368a8ebaf30a82ed3171 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 09:45:20 -0500 Subject: [PATCH 05/15] Uncomment test --- test/test_blob.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_blob.rb b/test/test_blob.rb index 6c3f8757..11be67a9 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -238,7 +238,7 @@ class TestBlob < Test::Unit::TestCase assert_equal Language['Objective-C'], blob("Foo.m").language assert_equal Language['Objective-C'], blob("hello.m").language assert_equal Language['Matlab'], blob("matlab_function.m").language - # assert_equal Language['Matlab'], blob("matlab_script.m").language + assert_equal Language['Matlab'], blob("matlab_script.m").language # ML assert_equal Language['OCaml'], blob("Foo.ml").language From b3e70659721b839b8b76116f0a3338d12a5a7dca Mon Sep 17 00:00:00 2001 From: Andrei Formiga Date: Tue, 5 Jul 2011 17:33:28 -0300 Subject: [PATCH 06/15] Added detection for Prolog and disambiguation for .pl files --- lib/linguist/blob_helper.rb | 23 +++++++++++++++++++++++ lib/linguist/languages.yml | 6 ++++++ test/test_blob.rb | 5 +++++ test/test_language.rb | 2 ++ 4 files changed, 36 insertions(+) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 97b6fa06..a1202cb9 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -262,6 +262,9 @@ module Linguist # If it's a .r file, try to guess the language r_language || + # If it's a .pl file, try to guess the language + pl_language || + # See if there is a Language for the extension pathname.language || @@ -304,6 +307,26 @@ module Linguist end end + # Internal: Guess language of .pl files + # + # Returns a Language. + def pl_language + return unless extname == '.pl' + + # The rules for disambiguation are: + # + # 1. Many perl files begin with a shebang + # 2. Most Prolog source files have a rule somewhere (marked by the :- operator) + # 3. Default to Perl, because it is more popular + if shebang_script == 'perl' + Language['Perl'] + elsif lines.grep(/:-/).any? + Language['Prolog'] + else + Language['Perl'] + end + end + # Internal: Extract the script name from the shebang line # # Requires Blob#data diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 46bbd766..eab2a7a5 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -589,6 +589,12 @@ Perl: - .perl - .psgi +Prolog: + major: true + extensions: + - .pro + - .prolog + Pure Data: major: true lexer: Text only diff --git a/test/test_blob.rb b/test/test_blob.rb index bcc63a98..9d6c9670 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -228,6 +228,11 @@ class TestBlob < Test::Unit::TestCase assert_equal Language['R'], blob("hello-r.R").language assert_equal Language['Rebol'], blob("hello-rebol.r").language + # .pl disambiguation + assert_equal Language['Prolog'], blob("test-prolog.pl").language + assert_equal Language['Perl'], blob("test-perl.pl").language + assert_equal Language['Perl'], blob("test-perl2.pl").language + # ML assert_equal Language['OCaml'], blob("Foo.ml").language assert_equal Language['Standard ML'], blob("Foo.sig").language diff --git a/test/test_language.rb b/test/test_language.rb index ffea3054..1a9f4108 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -81,6 +81,7 @@ class TestLanguage < Test::Unit::TestCase assert_equal Lexer['Ooc'], Language['ooc'].lexer assert_equal Lexer['PHP'], Language['PHP'].lexer assert_equal Lexer['Perl'], Language['Perl'].lexer + assert_equal Lexer['Prolog'], Language['Prolog'].lexer assert_equal Lexer['Python Traceback'], Language['Python traceback'].lexer assert_equal Lexer['Python'], Language['Python'].lexer assert_equal Lexer['REBOL'], Language['Rebol'].lexer @@ -310,6 +311,7 @@ class TestLanguage < Test::Unit::TestCase assert Language['Objective-J'].major? assert Language['PHP'].major? assert Language['Perl'].major? + assert Language['Prolog'].major? assert Language['Pure Data'].major? assert Language['Python'].major? assert Language['R'].major? From e3d0028ff3c4ceda3e6cc3b1a7749587e0697f70 Mon Sep 17 00:00:00 2001 From: "Bryce \"BonzoESC\" Kerley" Date: Tue, 5 Jul 2011 13:43:11 -0700 Subject: [PATCH 07/15] Added `.zsh` and `.bash` extensions to the Shell language definition. --- lib/linguist/languages.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 33b2f003..fa9a0a93 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -715,6 +715,8 @@ Shell: extensions: - .bash - .sh + - .zsh + - .bash filenames: - .bash_profile - .bashrc From 93330c5be3cfdf4287d6306d2aa2340299fd06b9 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 15:50:33 -0500 Subject: [PATCH 08/15] Preserve new lines for syntax highlighting --- lib/linguist/lexer.rb | 2 +- test/test_language.rb | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/linguist/lexer.rb b/lib/linguist/lexer.rb index 0bafd280..f38d730b 100644 --- a/lib/linguist/lexer.rb +++ b/lib/linguist/lexer.rb @@ -141,7 +141,7 @@ module Linguist # # Returns html String def colorize(text) - Albino.colorize(text, self) + Albino.new(text, self).colorize(:O => 'stripnl=false') end # Public: Highlight syntax of text without the outer highlight div diff --git a/test/test_language.rb b/test/test_language.rb index ffea3054..5b524021 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -457,6 +457,16 @@ Hello assert_equal <<-HTML, Language['Ruby'].colorize_without_wrapper("def foo\n 'foo'\nend\n") def foo 'foo' +end + HTML + end + + def test_colorize_doesnt_strip_newlines + assert_equal <<-HTML, Language['Ruby'].colorize_without_wrapper("\n\n# Foo\ndef 'foo'\nend\n") + + +# Foo +def 'foo' end HTML end From 4a18078c5dcecb5c11b861502d7c8513e00270ee Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 15:59:01 -0500 Subject: [PATCH 09/15] Remove duplicate .bash extension --- lib/linguist/languages.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index fa9a0a93..f36ee009 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -716,7 +716,6 @@ Shell: - .bash - .sh - .zsh - - .bash filenames: - .bash_profile - .bashrc From f4cfde45bd46110a247317d5fe9fca5543a1002a Mon Sep 17 00:00:00 2001 From: Andrei Formiga Date: Tue, 5 Jul 2011 18:01:44 -0300 Subject: [PATCH 10/15] Included test files for .pl disambiguation --- test/fixtures/test-perl.pl | 2 ++ test/fixtures/test-perl2.pl | 3 +++ test/fixtures/test-prolog.pl | 12 ++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 test/fixtures/test-perl.pl create mode 100644 test/fixtures/test-perl2.pl create mode 100644 test/fixtures/test-prolog.pl diff --git a/test/fixtures/test-perl.pl b/test/fixtures/test-perl.pl new file mode 100644 index 00000000..83710279 --- /dev/null +++ b/test/fixtures/test-perl.pl @@ -0,0 +1,2 @@ +#!/usr/bin/perl +print "Hello, world!\n"; diff --git a/test/fixtures/test-perl2.pl b/test/fixtures/test-perl2.pl new file mode 100644 index 00000000..55a2762c --- /dev/null +++ b/test/fixtures/test-perl2.pl @@ -0,0 +1,3 @@ + +# Perl file without shebang +print "Hello, world!\n"; diff --git a/test/fixtures/test-prolog.pl b/test/fixtures/test-prolog.pl new file mode 100644 index 00000000..aab83d54 --- /dev/null +++ b/test/fixtures/test-prolog.pl @@ -0,0 +1,12 @@ +/* Prolog test file */ +male(john). +male(peter). + +female(vick). +female(christie). + +parents(john, peter, christie). +parents(vick, peter, christie). + +/* X is a brother of Y */ +brother(X, Y) :- male(X), parents(X, F, M), parents(Y, F, M). From 1e453a22b5af338c6296da4d95079e090ba97b29 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 19:51:54 -0500 Subject: [PATCH 11/15] Be explicit about ambiguous extensions --- lib/linguist/language.rb | 44 ++++++++++++++++++++++++++++++-------- lib/linguist/languages.yml | 12 ++++++++++- test/test_language.rb | 15 ++++++++++++- 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 6a44e2a2..9eb4df4d 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -9,12 +9,27 @@ module Linguist # Languages are defined in `lib/linguist/languages.yml`. class Language @languages = [] + @overrides = {} @index = {} @name_index = {} @alias_index = {} @extension_index = {} @filename_index = {} + # Internal: Test if extension maps to multiple Languages. + # + # Returns true or false. + def self.ambiguous?(extension) + @overrides.include?(extension) + end + + # Include?: Return overridden extensions. + # + # Returns extensions Array. + def self.overridden_extensions + @overrides.keys + end + # Internal: Create a new Language object # # attributes - A hash of attributes @@ -47,17 +62,21 @@ module Linguist warn "Extension is missing a '.': #{extension.inspect}" end - # All Language extensions should be unique. Warn if there is a - # duplicate. - if @extension_index.key?(extension) - warn "Duplicate extension: #{extension}" + unless ambiguous?(extension) + # Index the extension with a leading ".": ".rb" + @extension_index[extension] = language + + # Index the extension without a leading ".": "rb" + @extension_index[extension.sub(/^\./, '')] = language + end + end + + language.overrides.each do |extension| + if extension !~ /^\./ + warn "Extension is missing a '.': #{extension.inspect}" end - # Index the extension with a leading ".": ".rb" - @extension_index[extension] = language - - # Index the extension without a leading ".": "rb" - @extension_index[extension.sub(/^\./, '')] = language + @overrides[extension] = language end language.filenames.each do |filename| @@ -191,6 +210,7 @@ module Linguist # Set extensions or default to []. @extensions = attributes[:extensions] || [] + @overrides = attributes[:overrides] || [] @filenames = attributes[:filenames] || [] # Set popular, major, and searchable flags @@ -260,6 +280,11 @@ module Linguist # Returns the extensions Array attr_reader :extensions + # Internal: Get overridden extensions. + # + # Returns the extensions Array. + attr_reader :overrides + # Public: Get filenames # # Examples @@ -381,6 +406,7 @@ module Linguist :searchable => options.key?('searchable') ? options['searchable'] : true, :search_term => options['search_term'], :extensions => options['extensions'], + :overrides => options['overrides'], :filenames => options['filenames'], :major => options['major'], :popular => popular.include?(name) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index ffae58e5..7e158b94 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -9,6 +9,7 @@ # aliases - An Array of additional aliases (implicitly # includes name.downcase) # extension - An Array of associated extensions +# overrides - An Array of extensions that takes precedence over conflicts # major - Boolean flag major programming languages. Please leave # this option to GitHub staff to decide. # searchable - Boolean flag to enable searching (defaults to true) @@ -477,6 +478,7 @@ Markdown: Matlab: extensions: + - .m - .matlab Max/MSP: @@ -542,6 +544,8 @@ ObjDump: Objective-C: major: true + overrides: + - .m extensions: - .m - .mm @@ -580,6 +584,8 @@ Parrot Internal Representation: Perl: major: true + overrides: + - .pl extensions: - .pl - .ph @@ -593,6 +599,7 @@ Perl: Prolog: major: true extensions: + - .pl - .pro - .prolog @@ -620,6 +627,8 @@ Python traceback: R: major: true lexer: S + overrides: + - .r extensions: - .r - .R @@ -648,9 +657,10 @@ Raw token data: Rebol: lexer: REBOL extensions: - - .rebol + - .r - .r2 - .r3 + - .rebol Redcode: extensions: diff --git a/test/test_language.rb b/test/test_language.rb index 6a83dbba..8fc881ec 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -5,6 +5,17 @@ require 'test/unit' class TestLanguage < Test::Unit::TestCase include Linguist + def test_ambiguous_extensions + assert Language.ambiguous?('.m') + assert_equal Language['Objective-C'], Language.find_by_extension('m') + + assert Language.ambiguous?('.pl') + assert_equal Language['Perl'], Language.find_by_extension('pl') + + assert Language.ambiguous?('.r') + assert_equal Language['R'], Language.find_by_extension('r') + end + def test_lexer # Add an assertion to this list if you add/change any lexers # in languages.yml. Please keep this list alphabetized. @@ -383,7 +394,9 @@ class TestLanguage < Test::Unit::TestCase def test_find_all_by_extension Language.all.each do |language| language.extensions.each do |extension| - assert_equal language, Language.find_by_extension(extension) + unless Language.ambiguous?(extension) + assert_equal language, Language.find_by_extension(extension) + end end end end From 32dfa5a2ddb1722e173e04f5f4ab2edd9e80981f Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 19:56:29 -0500 Subject: [PATCH 12/15] .h is ambiguous --- lib/linguist/languages.yml | 4 ++++ test/test_language.rb | 3 +++ 2 files changed, 7 insertions(+) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 7e158b94..1249d135 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -101,6 +101,8 @@ Brainfuck: C: major: true + overrides: + - .h extensions: - .c - .h @@ -124,6 +126,7 @@ C++: - .cpp - .cu - .cxx + - .h - .h++ - .hh - .hpp @@ -547,6 +550,7 @@ Objective-C: overrides: - .m extensions: + - .h - .m - .mm diff --git a/test/test_language.rb b/test/test_language.rb index 8fc881ec..4acb7b65 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -6,6 +6,9 @@ class TestLanguage < Test::Unit::TestCase include Linguist def test_ambiguous_extensions + assert Language.ambiguous?('.h') + assert_equal Language['C'], Language.find_by_extension('h') + assert Language.ambiguous?('.m') assert_equal Language['Objective-C'], Language.find_by_extension('m') From 6611f174e5e1c1d19f251c779bdb7cc57d2b4373 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 20:11:07 -0500 Subject: [PATCH 13/15] Dispatch to ambiguous language guess method --- lib/linguist/blob_helper.rb | 59 +++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 44264327..b1e095a5 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -275,17 +275,8 @@ module Linguist def guess_language return if binary? - # If its a header file (.h) try to guess the language - header_language || - - # If it's a .m file, try to guess the language - m_language || - - # If it's a .pl file, try to guess the language - pl_language || - - # If it's a .r file, try to guess the language - r_language || + # Disambiguate between multiple language extensions + disambiguate_extension_language || # See if there is a Language for the extension pathname.language || @@ -304,12 +295,22 @@ module Linguist language ? language.lexer : Lexer['Text only'] end + # Internal: Disambiguates between multiple language extensions. + # + # Delegates to "guess_EXTENSION_language". + # + # Returns a Language or nil. + def disambiguate_extension_language + if Language.ambiguous?(extname) + name = "guess_#{extname.sub(/^\./, '')}_language" + send(name) if respond_to?(name) + end + end + # Internal: Guess language of header files (.h). # # Returns a Language. - def header_language - return unless extname == '.h' - + def guess_h_language if lines.grep(/^@(interface|property|private|public|end)/).any? Language['Objective-C'] elsif lines.grep(/^class |^\s+(public|protected|private):/).any? @@ -329,9 +330,7 @@ module Linguist # * "%" comments # # Returns a Language. - def m_language - return unless extname == '.m' - + def guess_m_language # Objective-C keywords if lines.grep(/^#import|@(interface|implementation|property|synthesize|end)/).any? Language['Objective-C'] @@ -352,15 +351,14 @@ module Linguist # Internal: Guess language of .pl files # + # The rules for disambiguation are: + # + # 1. Many perl files begin with a shebang + # 2. Most Prolog source files have a rule somewhere (marked by the :- operator) + # 3. Default to Perl, because it is more popular + # # Returns a Language. - def pl_language - return unless extname == '.pl' - - # The rules for disambiguation are: - # - # 1. Many perl files begin with a shebang - # 2. Most Prolog source files have a rule somewhere (marked by the :- operator) - # 3. Default to Perl, because it is more popular + def guess_pl_language if shebang_script == 'perl' Language['Perl'] elsif lines.grep(/:-/).any? @@ -373,9 +371,7 @@ module Linguist # Internal: Guess language of .r files. # # Returns a Language. - def r_language - return unless extname == '.r' - + def guess_r_language if lines.grep(/(rebol|(:\s+func|make\s+object!|^\s*context)\s*\[)/i).any? Language['Rebol'] else @@ -473,5 +469,12 @@ module Linguist return if !text? || large? lexer.colorize_without_wrapper(data) end + + Language.overridden_extensions.each do |extension| + name = "guess_#{extension.sub(/^\./, '')}_language" + unless instance_methods.include?(name) + warn "Language##{name} was not defined" + end + end end end From 8f46cd07487b6c5ce034ef7a9a3e19f999914513 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 20:48:06 -0500 Subject: [PATCH 14/15] Try to classify language types --- lib/linguist/language.rb | 50 +++++--------- lib/linguist/languages.yml | 138 +++++++++++++++++++++---------------- lib/linguist/repository.rb | 9 +-- test/test_language.rb | 88 +++++------------------ 4 files changed, 112 insertions(+), 173 deletions(-) diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 9eb4df4d..794820b4 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -16,6 +16,9 @@ module Linguist @extension_index = {} @filename_index = {} + # Valid Languages types + TYPES = [:markup, :programming] + # Internal: Test if extension maps to multiple Languages. # # Returns true or false. @@ -198,6 +201,12 @@ module Linguist # @name is required @name = attributes[:name] || raise(ArgumentError, "missing name") + # Set type + @type = attributes[:type] ? attributes[:type].to_sym : nil + if @type && !TYPES.include?(@type) + raise ArgumentError, "invalid type: #{@type}" + end + # Set aliases @aliases = [default_alias_name] + (attributes[:aliases] || []) @@ -213,17 +222,12 @@ module Linguist @overrides = attributes[:overrides] || [] @filenames = attributes[:filenames] || [] - # Set popular, major, and searchable flags + # Set popular, and searchable flags @popular = attributes.key?(:popular) ? attributes[:popular] : false - @major = attributes.key?(:major) ? attributes[:major] : false @searchable = attributes.key?(:searchable) ? attributes[:searchable] : true # If group name is set, save the name so we can lazy load it later if attributes[:group_name] - if major? - warn "#{name} is a major language, it should not be grouped with #{attributes[:group_name]}" - end - @group = nil @group_name = attributes[:group_name] @@ -231,7 +235,6 @@ module Linguist else @group = self end - end # Public: Get proper name @@ -245,6 +248,11 @@ module Linguist # Returns the name String attr_reader :name + # Public: Get type. + # + # Returns a type Symbol or nil. + attr_reader :type + # Public: Get aliases # # Examples @@ -303,12 +311,6 @@ module Linguist # Public: Get Language group # - # Minor languages maybe grouped with major languages for - # accounting purposes. For an example, JSP files are grouped as - # Java. - # - # For major languages, group should always return self. - # # Returns a Language def group @group ||= Language.find_by_name(@group_name) @@ -328,26 +330,6 @@ module Linguist !popular? end - # Public: Is it major language? - # - # Major languages should be actual programming - # languages. Configuration formats should be excluded. - # - # Returns true or false - def major? - @major - end - - # Public: Is it a minor language? - # - # Minor language include variants of major languages and - # markup languages like HTML and YAML. - # - # Returns true or false - def minor? - !major? - end - # Public: Is it searchable? # # Unsearchable languages won't by indexed by solr and won't show @@ -400,6 +382,7 @@ module Linguist YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| Language.create( :name => name, + :type => options['type'], :aliases => options['aliases'], :lexer => options['lexer'], :group_name => options['group'], @@ -408,7 +391,6 @@ module Linguist :extensions => options['extensions'], :overrides => options['overrides'], :filenames => options['filenames'], - :major => options['major'], :popular => popular.include?(name) ) end diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 1249d135..13aad091 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -5,13 +5,12 @@ # lexer exists in lexers.yml. This is a list of available in our # version of pygments. # +# type - Either programming, markup, or nil # lexer - An explicit lexer String (defaults to name.downcase) # aliases - An Array of additional aliases (implicitly # includes name.downcase) # extension - An Array of associated extensions # overrides - An Array of extensions that takes precedence over conflicts -# major - Boolean flag major programming languages. Please leave -# this option to GitHub staff to decide. # searchable - Boolean flag to enable searching (defaults to true) # search_term - Deprecated: Some languages maybe indexed under a # different alias. Avoid defining new exceptions. @@ -22,7 +21,7 @@ # Please keep this list alphabetized. ASP: - major: true + type: programming lexer: aspx-vb search_term: aspx-vb aliases: @@ -38,7 +37,7 @@ ASP: - .asp ActionScript: - major: true + type: programming lexer: ActionScript 3 search_term: as3 aliases: @@ -47,7 +46,7 @@ ActionScript: - .as Ada: - major: true + type: programming extensions: - .adb - .ads @@ -58,13 +57,13 @@ AppleScript: - .applescript Arc: - major: true + type: programming lexer: Text only extensions: - .arc Assembly: - major: true + type: programming lexer: NASM search_term: nasm aliases: @@ -73,6 +72,7 @@ Assembly: - .asm Batchfile: + type: programming group: Shell search_term: bat aliases: @@ -90,7 +90,7 @@ BlitzMax: - .bmx Boo: - major: true + type: programming extensions: - .boo @@ -100,7 +100,7 @@ Brainfuck: - .bf C: - major: true + type: programming overrides: - .h extensions: @@ -108,7 +108,7 @@ C: - .h C#: - major: true + type: programming search_term: csharp aliases: - csharp @@ -116,7 +116,7 @@ C#: - .cs C++: - major: true + type: programming search_term: cpp aliases: - cpp @@ -148,19 +148,19 @@ ChucK: - .ck Clojure: - major: true + type: programming extensions: - .clj CoffeeScript: - major: true + type: programming extensions: - .coffee filenames: - Cakefile ColdFusion: - major: true + type: programming lexer: Coldfusion HTML search_term: cfm aliases: @@ -170,7 +170,7 @@ ColdFusion: - .cfc Common Lisp: - major: true + type: programming aliases: - lisp extensions: @@ -190,6 +190,7 @@ Cucumber: - .feature Cython: + type: programming group: Python extensions: - .pyx @@ -197,7 +198,7 @@ Cython: - .pxi D: - major: true + type: programming extensions: - .d - .di @@ -216,7 +217,7 @@ Darcs Patch: - .dpatch Delphi: - major: true + type: programming extensions: - .pas @@ -226,18 +227,18 @@ Diff: - .patch Dylan: - major: true + type: programming extensions: - .dylan Eiffel: - major: true + type: programming lexer: Text only extensions: - .e Emacs Lisp: - major: true + type: programming lexer: Scheme aliases: - elisp @@ -246,13 +247,13 @@ Emacs Lisp: - .emacs Erlang: - major: true + type: programming extensions: - .hrl - .erl F#: - major: true + type: programming lexer: OCaml search_term: ocaml extensions: @@ -261,7 +262,7 @@ F#: - .fsx FORTRAN: - major: true + type: programming lexer: Fortran extensions: - .f @@ -270,17 +271,18 @@ FORTRAN: - .F90 Factor: - major: true + type: programming extensions: - .factor Fancy: - major: true + type: programming extensions: - .fy - .fancypack GAS: + type: programming group: Assembly extensions: - .s @@ -312,7 +314,7 @@ Gettext Catalog: - .pot Go: - major: true + type: programming extensions: - .go @@ -328,13 +330,14 @@ Groff: - '.7' Groovy: - major: true + type: programming lexer: Java extensions: - .gradle - .groovy HTML: + type: markup extensions: - .html - .xhtml @@ -342,12 +345,14 @@ HTML: - .xslt HTML+Django: + type: markup group: HTML lexer: HTML+Django/Jinja extensions: - .mustache HTML+ERB: + type: markup group: HTML lexer: RHTML extensions: @@ -355,12 +360,13 @@ HTML+ERB: - .html.erb HTML+PHP: + type: markup group: HTML extensions: - .phtml HaXe: - major: true + type: programming lexer: haXe extensions: - .hx @@ -368,11 +374,12 @@ HaXe: - .mtt Haml: + type: markup extensions: - .haml Haskell: - major: true + type: programming extensions: - .hs - .hsc @@ -394,7 +401,7 @@ IRC log: - .weechatlog Io: - major: true + type: programming extensions: - .io @@ -406,7 +413,7 @@ JSON: - .json Java: - major: true + type: programming extensions: - .java - .pde @@ -421,7 +428,7 @@ Java Server Pages: - .jsp JavaScript: - major: true + type: programming aliases: - js - node @@ -447,6 +454,7 @@ LilyPond: - .ily Literate Haskell: + type: programming group: Haskell search_term: lhs aliases: @@ -455,7 +463,7 @@ Literate Haskell: - .lhs Lua: - major: true + type: programming extensions: - .lua - .nse @@ -471,6 +479,7 @@ Mako: - .mao Markdown: + type: programming lexer: Text only extensions: - .md @@ -485,7 +494,7 @@ Matlab: - .matlab Max/MSP: - major: true + type: programming lexer: Text only extensions: - .mxt @@ -494,7 +503,7 @@ MiniD: # Legacy searchable: false Mirah: - major: true + type: programming lexer: Ruby search_term: ruby extensions: @@ -516,7 +525,7 @@ Nimrod: - .nim Nu: - major: true + type: programming lexer: Scheme aliases: - nush @@ -533,7 +542,7 @@ NumPy: - .numpyw OCaml: - major: true + type: programming extensions: - .ml - .mly @@ -546,7 +555,7 @@ ObjDump: - .objdump Objective-C: - major: true + type: programming overrides: - .m extensions: @@ -555,19 +564,20 @@ Objective-C: - .mm Objective-J: - major: true + type: programming extensions: - .j - .sj OpenCL: + type: programming group: C lexer: C extensions: - .cl PHP: - major: true + type: programming extensions: - .php - .aw @@ -587,7 +597,7 @@ Parrot Internal Representation: - .pasm Perl: - major: true + type: programming overrides: - .pl extensions: @@ -601,20 +611,20 @@ Perl: - .psgi Prolog: - major: true + type: programming extensions: - .pl - .pro - .prolog Pure Data: - major: true + type: programming lexer: Text only extensions: - .pd Python: - major: true + type: programming extensions: - .py - .pyw @@ -629,7 +639,7 @@ Python traceback: - .pytb R: - major: true + type: programming lexer: S overrides: - .r @@ -638,12 +648,13 @@ R: - .R RHTML: + type: markup group: HTML extensions: - .rhtml Racket: - major: true + type: programming lexer: Scheme extensions: - .rkt @@ -671,7 +682,7 @@ Redcode: - .cw Ruby: - major: true + type: programming aliases: - jruby - macruby @@ -706,12 +717,12 @@ Sass: - .sass Scala: - major: true + type: programming extensions: - .scala Scheme: - major: true + type: programming extensions: - .sls - .ss @@ -719,13 +730,13 @@ Scheme: - .scm Self: - major: true + type: programming lexer: Text only extensions: - .self Shell: - major: true + type: programming lexer: Bash search_term: bash aliases: @@ -745,7 +756,7 @@ Shell: - .zshrc Smalltalk: - major: true + type: programming extensions: - .st @@ -762,17 +773,18 @@ Standard ML: - .sml SuperCollider: - major: true + type: programming lexer: Text only extensions: - .sc Tcl: - major: true + type: programming extensions: - .tcl Tcsh: + type: programming group: Shell extensions: - .tcsh @@ -792,30 +804,31 @@ Text: - .txt Textile: + type: markup lexer: Text only extensions: - .textile VHDL: - major: true + type: programming lexer: Text only extensions: - .vhdl - .vhd Vala: - major: true + type: programming extensions: - .vala Verilog: - major: true + type: programming lexer: Text only extensions: - .v VimL: - major: true + type: programming search_term: vim aliases: - vim @@ -826,7 +839,7 @@ VimL: - .gvimrc Visual Basic: - major: true + type: programming lexer: Text only extensions: - .bas @@ -836,6 +849,7 @@ Visual Basic: - .vb XML: + type: markup extensions: - .xml - .rss @@ -848,7 +862,7 @@ XML: - .rdf XQuery: - major: true + type: programming extensions: - .xq - .xqm @@ -861,6 +875,7 @@ XS: - .xs YAML: + type: markup extensions: - .yml - .yaml @@ -873,12 +888,13 @@ mupad: - .mu ooc: - major: true + type: programming lexer: Ooc extensions: - .ooc reStructuredText: + type: markup search_term: rst aliases: - rst diff --git a/lib/linguist/repository.rb b/lib/linguist/repository.rb index 69f35f1d..3341f492 100644 --- a/lib/linguist/repository.rb +++ b/lib/linguist/repository.rb @@ -70,12 +70,9 @@ module Linguist # Skip vendored or generated blobs next if blob.vendored? || blob.generated? || blob.language.nil? - # Get language group - language = blob.language.group - - # Only include major languages - if language.major? - @sizes[language] += blob.size + # Only include programming languages + if blob.language.type == :programming + @sizes[blob.language.group] += blob.size end end diff --git a/test/test_language.rb b/test/test_language.rb index 4acb7b65..250cc0bc 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -213,23 +213,18 @@ class TestLanguage < Test::Unit::TestCase assert_equal Language['reStructuredText'], Language.find_by_alias('rst') end - def test_major_groups - Language.all.each do |language| - if language.major? - assert_equal language, language.group - end - end - end - def test_groups assert_equal Language['Assembly'], Language['GAS'].group assert_equal Language['C'], Language['OpenCL'].group assert_equal Language['Haskell'], Language['Literate Haskell'].group assert_equal Language['Java'], Language['Java Server Pages'].group assert_equal Language['JavaScript'], Language['JSON'].group + assert_equal Language['Perl'], Language['Perl'].group assert_equal Language['Python'], Language['Cython'].group assert_equal Language['Python'], Language['NumPy'].group assert_equal Language['Python'], Language['Python traceback'].group + assert_equal Language['Python'], Language['Python'].group + assert_equal Language['Ruby'], Language['Ruby'].group assert_equal Language['Shell'], Language['Batchfile'].group assert_equal Language['Shell'], Language['Gentoo Ebuild'].group assert_equal Language['Shell'], Language['Gentoo Eclass'].group @@ -285,72 +280,21 @@ class TestLanguage < Test::Unit::TestCase assert Language['Brainfuck'].unpopular? end - def test_major - # Add an assertion to this list if you add/change any major - # settings in languages.yml. Please keep this list alphabetized. - assert Language['ASP'].major? - assert Language['ActionScript'].major? - assert Language['Ada'].major? - assert Language['Arc'].major? - assert Language['Assembly'].major? - assert Language['Boo'].major? - assert Language['C#'].major? - assert Language['C'].major? - assert Language['C++'].major? - assert Language['Clojure'].major? - assert Language['CoffeeScript'].major? - assert Language['ColdFusion'].major? - assert Language['Common Lisp'].major? - assert Language['D'].major? - assert Language['Delphi'].major? - assert Language['Dylan'].major? - assert Language['Eiffel'].major? - assert Language['Emacs Lisp'].major? - assert Language['Erlang'].major? - assert Language['F#'].major? - assert Language['FORTRAN'].major? - assert Language['Factor'].major? - assert Language['Go'].major? - assert Language['Groovy'].major? - assert Language['HaXe'].major? - assert Language['Haskell'].major? - assert Language['Io'].major? - assert Language['Java'].major? - assert Language['JavaScript'].major? - assert Language['Lua'].major? - assert Language['Max/MSP'].major? - assert Language['Nu'].major? - assert Language['OCaml'].major? - assert Language['Objective-C'].major? - assert Language['Objective-J'].major? - assert Language['PHP'].major? - assert Language['Perl'].major? - assert Language['Prolog'].major? - assert Language['Pure Data'].major? - assert Language['Python'].major? - assert Language['R'].major? - assert Language['Racket'].major? - assert Language['Ruby'].major? - assert Language['Scala'].major? - assert Language['Scheme'].major? - assert Language['Self'].major? - assert Language['Smalltalk'].major? - assert Language['SuperCollider'].major? - assert Language['Tcl'].major? - assert Language['VHDL'].major? - assert Language['Vala'].major? - assert Language['Verilog'].major? - assert Language['VimL'].major? - assert Language['Visual Basic'].major? - assert Language['XQuery'].major? - assert Language['ooc'].major? + def test_programming + assert_equal :programming, Language['JavaScript'].type + assert_equal :programming, Language['Perl'].type + assert_equal :programming, Language['Python'].type + assert_equal :programming, Language['Ruby'].type end - def test_minor - assert Language['Brainfuck'].minor? - assert Language['HTML'].minor? - assert Language['Makefile'].minor? - assert Language['YAML'].minor? + def test_markup + assert_equal :markup, Language['HTML'].type + assert_equal :markup, Language['YAML'].type + end + + def test_other + assert_nil Language['Brainfuck'].type + assert_nil Language['Makefile'].type end def test_searchable From daf9f79e89d1639d6a28ab7c41d7a1f7af870eae Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 5 Jul 2011 19:17:24 -0700 Subject: [PATCH 15/15] *Mark*up --- lib/linguist/languages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 13aad091..9147289d 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -479,7 +479,7 @@ Mako: - .mao Markdown: - type: programming + type: markup lexer: Text only extensions: - .md