diff --git a/README.md b/README.md index 94430f4d..2841eaa4 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # Linguist -We use this library at GitHub to detect blob languages, highlight code, ignore binary files, suppress generated files in diffs, and generate language breakdown graphs. +We use this library at GitHub to detect blob languages, ignore binary files, suppress generated files in diffs, and generate language breakdown graphs. ## Features ### Language detection -Linguist defines a list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). In order for a file to be highlighted, a language and a lexer must be defined there. +Linguist defines a list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). Most languages are detected by their file extension. For disambiguating between files with common extensions, we first apply some common-sense heuristics to pick out obvious languages. After that, we use a [statistical @@ -22,10 +22,6 @@ Linguist::FileBlob.new("bin/linguist").language.name #=> "Ruby" See [lib/linguist/language.rb](https://github.com/github/linguist/blob/master/lib/linguist/language.rb) and [lib/linguist/languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). -### Syntax Highlighting - -The actual syntax highlighting is handled by our Pygments wrapper, [pygments.rb](https://github.com/tmm1/pygments.rb). It also provides a [Lexer abstraction](https://github.com/tmm1/pygments.rb/blob/master/lib/pygments/lexer.rb) that determines which highlighter should be used on a file. - ### Stats The Language stats bar that you see on every repository is built by aggregating the languages of each file in that repository. The top language in the graph determines the project's primary language. diff --git a/github-linguist.gemspec b/github-linguist.gemspec index db41c587..a30e0f19 100644 --- a/github-linguist.gemspec +++ b/github-linguist.gemspec @@ -16,7 +16,6 @@ Gem::Specification.new do |s| s.add_dependency 'charlock_holmes', '~> 0.7.3' s.add_dependency 'escape_utils', '~> 1.0.1' s.add_dependency 'mime-types', '~> 1.19' - s.add_dependency 'pygments.rb', '~> 0.6.0' s.add_dependency 'rugged', '~> 0.21.1b2' s.add_development_dependency 'mocha' diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 840ca75c..d6d3dd30 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -2,7 +2,6 @@ require 'linguist/generated' require 'charlock_holmes' require 'escape_utils' require 'mime/types' -require 'pygments' require 'yaml' module Linguist @@ -193,10 +192,6 @@ module Linguist # Public: Is the blob safe to colorize? # - # We use Pygments for syntax highlighting blobs. Pygments - # can be too slow for very large blobs or for certain - # corner-case blobs. - # # Return true or false def safe_to_colorize? !large? && text? && !high_ratio_of_long_lines? @@ -204,9 +199,6 @@ module Linguist # Internal: Does the blob have a ratio of long lines? # - # These types of files are usually going to make Pygments.rb - # angry if we try to colorize them. - # # Return true or false def high_ratio_of_long_lines? return false if loc == 0 @@ -314,28 +306,9 @@ module Linguist @language ||= Language.detect(self) end - # Internal: Get the lexer of the blob. - # - # Returns a Lexer. - def lexer - language ? language.lexer : Pygments::Lexer.find_by_name('Text only') - end - # Internal: Get the TextMate compatible scope for the blob def tm_scope language && language.tm_scope end - - # Public: Highlight syntax of blob - # - # options - A Hash of options (defaults to {}) - # - # Returns html String - def colorize(options = {}) - return unless safe_to_colorize? - options[:options] ||= {} - options[:options][:encoding] ||= encoding - lexer.highlight(data, options) - end end end diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 485d6efb..507ae71a 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -1,5 +1,4 @@ require 'escape_utils' -require 'pygments' require 'yaml' begin require 'yajl' @@ -302,10 +301,7 @@ module Linguist # Set aliases @aliases = [default_alias_name] + (attributes[:aliases] || []) - # Lookup Lexer object - @lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) || - raise(ArgumentError, "#{@name} is missing lexer") - + # Load the TextMate scope name or try to guess one @tm_scope = attributes[:tm_scope] || begin context = case @type when :data, :markup, :prose diff --git a/test/test_blob.rb b/test/test_blob.rb index 1d37050d..a6aa0ecf 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -4,7 +4,6 @@ require 'linguist/samples' require 'test/unit' require 'mocha/setup' require 'mime/types' -require 'pygments' class TestBlob < Test::Unit::TestCase include Linguist @@ -473,20 +472,7 @@ class TestBlob < Test::Unit::TestCase assert_equal Lexer['Ruby'], blob("Ruby/foo.rb").lexer end - def test_colorize - assert_equal <<-HTML.chomp, blob("Ruby/foo.rb").colorize -
module Foo
-end
-
- HTML - end - - def test_colorize_does_skip_minified_files - assert_nil blob("JavaScript/jquery-1.6.1.min.js").colorize - end - - # Pygments.rb was taking exceeding long on this particular file - def test_colorize_doesnt_blow_up_with_files_with_high_ratio_of_long_lines - assert_nil blob("JavaScript/steelseries-min.js").colorize + def test_minified_files_not_safe_to_highlight + assert !blob("JavaScript/jquery-1.6.1.min.js").safe_to_colorize? end end diff --git a/test/test_language.rb b/test/test_language.rb index 4adc8efe..cd48bc75 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -1,65 +1,9 @@ require 'linguist/language' - require 'test/unit' -require 'pygments' class TestLanguage < Test::Unit::TestCase include Linguist - Lexer = Pygments::Lexer - - def test_lexer - assert_equal Lexer['ActionScript 3'], Language['ActionScript'].lexer - assert_equal Lexer['AspectJ'], Language['AspectJ'].lexer - assert_equal Lexer['Bash'], Language['Gentoo Ebuild'].lexer - assert_equal Lexer['Bash'], Language['Gentoo Eclass'].lexer - assert_equal Lexer['Bash'], Language['Shell'].lexer - assert_equal Lexer['C'], Language['OpenCL'].lexer - assert_equal Lexer['C'], Language['XS'].lexer - assert_equal Lexer['C++'], Language['C++'].lexer - assert_equal Lexer['Chapel'], Language['Chapel'].lexer - assert_equal Lexer['Coldfusion HTML'], Language['ColdFusion'].lexer - assert_equal Lexer['Coq'], Language['Coq'].lexer - assert_equal Lexer['FSharp'], Language['F#'].lexer - assert_equal Lexer['FSharp'], Language['F#'].lexer - assert_equal Lexer['Fortran'], Language['FORTRAN'].lexer - assert_equal Lexer['Gherkin'], Language['Cucumber'].lexer - assert_equal Lexer['Groovy'], Language['Groovy'].lexer - assert_equal Lexer['HTML'], Language['HTML'].lexer - assert_equal Lexer['HTML+Django/Jinja'], Language['HTML+Django'].lexer - assert_equal Lexer['HTML+PHP'], Language['HTML+PHP'].lexer - assert_equal Lexer['HTTP'], Language['HTTP'].lexer - assert_equal Lexer['JSON'], Language['JSON'].lexer - assert_equal Lexer['Java'], Language['ChucK'].lexer - assert_equal Lexer['Java'], Language['Java'].lexer - assert_equal Lexer['JavaScript'], Language['JavaScript'].lexer - assert_equal Lexer['LSL'], Language['LSL'].lexer - assert_equal Lexer['MOOCode'], Language['Moocode'].lexer - assert_equal Lexer['MuPAD'], Language['mupad'].lexer - assert_equal Lexer['NASM'], Language['Assembly'].lexer - assert_equal Lexer['OCaml'], Language['OCaml'].lexer - assert_equal Lexer['Ooc'], Language['ooc'].lexer - assert_equal Lexer['OpenEdge ABL'], Language['OpenEdge ABL'].lexer - assert_equal Lexer['REBOL'], Language['Rebol'].lexer - assert_equal Lexer['RHTML'], Language['HTML+ERB'].lexer - assert_equal Lexer['RHTML'], Language['RHTML'].lexer - assert_equal Lexer['Ruby'], Language['Crystal'].lexer - assert_equal Lexer['Ruby'], Language['Mirah'].lexer - assert_equal Lexer['Ruby'], Language['Ruby'].lexer - assert_equal Lexer['S'], Language['R'].lexer - assert_equal Lexer['Common Lisp'], Language['Emacs Lisp'].lexer - assert_equal Lexer['Scheme'], Language['Nu'].lexer - assert_equal Lexer['Racket'], Language['Racket'].lexer - assert_equal Lexer['Scheme'], Language['Scheme'].lexer - assert_equal Lexer['Standard ML'], Language['Standard ML'].lexer - assert_equal Lexer['TeX'], Language['TeX'].lexer - assert_equal Lexer['Verilog'], Language['Verilog'].lexer - assert_equal Lexer['XSLT'], Language['XSLT'].lexer - assert_equal Lexer['aspx-vb'], Language['ASP'].lexer - assert_equal Lexer['haXe'], Language['Haxe'].lexer - assert_equal Lexer['reStructuredText'], Language['reStructuredText'].lexer - end - def test_find_by_alias assert_equal Language['ASP'], Language.find_by_alias('asp') assert_equal Language['ASP'], Language.find_by_alias('aspx') @@ -421,22 +365,4 @@ class TestLanguage < Test::Unit::TestCase def test_by_type assert !Language.by_type(:prose).nil? end - - def test_colorize - assert_equal <<-HTML.chomp, Language['Ruby'].colorize("def foo\n 'foo'\nend\n") -
def foo
-  'foo'
-end
-
- HTML - end - - def test_colorize_with_options - assert_equal <<-HTML.chomp, Language['Ruby'].colorize("def foo\n 'foo'\nend\n", :options => { :cssclass => "highlight highlight-ruby" }) -
def foo
-  'foo'
-end
-
- HTML - end end