mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Merge pull request #1707 from github/vmg/lol-pygments
Remove the Pygments dependency
This commit is contained in:
		| @@ -1,12 +1,12 @@ | ||||
| # Linguist | ||||
|  | ||||
| We use this library at GitHub to detect blob languages, highlight code, ignore binary files, suppress generated files in diffs, and generate language breakdown graphs. | ||||
| We use this library at GitHub to detect blob languages, ignore binary files, suppress generated files in diffs, and generate language breakdown graphs. | ||||
|  | ||||
| ## Features | ||||
|  | ||||
| ### Language detection | ||||
|  | ||||
| Linguist defines a list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). In order for a file to be highlighted, a language and a lexer must be defined there. | ||||
| Linguist defines a list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). | ||||
|  | ||||
| Most languages are detected by their file extension. For disambiguating between files with common extensions, we first apply some common-sense heuristics to pick out obvious languages. After that, we use a | ||||
| [statistical | ||||
| @@ -22,10 +22,6 @@ Linguist::FileBlob.new("bin/linguist").language.name #=> "Ruby" | ||||
|  | ||||
| See [lib/linguist/language.rb](https://github.com/github/linguist/blob/master/lib/linguist/language.rb) and [lib/linguist/languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). | ||||
|  | ||||
| ### Syntax Highlighting | ||||
|  | ||||
| The actual syntax highlighting is handled by our Pygments wrapper, [pygments.rb](https://github.com/tmm1/pygments.rb). It also provides a [Lexer abstraction](https://github.com/tmm1/pygments.rb/blob/master/lib/pygments/lexer.rb) that determines which highlighter should be used on a file. | ||||
|  | ||||
| ### Stats | ||||
|  | ||||
| The Language stats bar that you see on every repository is built by aggregating the languages of each file in that repository. The top language in the graph determines the project's primary language. | ||||
|   | ||||
| @@ -16,7 +16,6 @@ Gem::Specification.new do |s| | ||||
|   s.add_dependency 'charlock_holmes', '~> 0.7.3' | ||||
|   s.add_dependency 'escape_utils',    '~> 1.0.1' | ||||
|   s.add_dependency 'mime-types',      '~> 1.19' | ||||
|   s.add_dependency 'pygments.rb',     '~> 0.6.0' | ||||
|   s.add_dependency 'rugged',          '~> 0.21.1b2' | ||||
|  | ||||
|   s.add_development_dependency 'mocha' | ||||
|   | ||||
| @@ -2,7 +2,6 @@ require 'linguist/generated' | ||||
| require 'charlock_holmes' | ||||
| require 'escape_utils' | ||||
| require 'mime/types' | ||||
| require 'pygments' | ||||
| require 'yaml' | ||||
|  | ||||
| module Linguist | ||||
| @@ -193,10 +192,6 @@ module Linguist | ||||
|  | ||||
|     # Public: Is the blob safe to colorize? | ||||
|     # | ||||
|     # We use Pygments for syntax highlighting blobs. Pygments | ||||
|     # can be too slow for very large blobs or for certain | ||||
|     # corner-case blobs. | ||||
|     # | ||||
|     # Return true or false | ||||
|     def safe_to_colorize? | ||||
|       !large? && text? && !high_ratio_of_long_lines? | ||||
| @@ -204,9 +199,6 @@ module Linguist | ||||
|  | ||||
|     # Internal: Does the blob have a ratio of long lines? | ||||
|     # | ||||
|     # These types of files are usually going to make Pygments.rb | ||||
|     # angry if we try to colorize them. | ||||
|     # | ||||
|     # Return true or false | ||||
|     def high_ratio_of_long_lines? | ||||
|       return false if loc == 0 | ||||
| @@ -314,28 +306,9 @@ module Linguist | ||||
|       @language ||= Language.detect(self) | ||||
|     end | ||||
|  | ||||
|     # Internal: Get the lexer of the blob. | ||||
|     # | ||||
|     # Returns a Lexer. | ||||
|     def lexer | ||||
|       language ? language.lexer : Pygments::Lexer.find_by_name('Text only') | ||||
|     end | ||||
|  | ||||
|     # Internal: Get the TextMate compatible scope for the blob | ||||
|     def tm_scope | ||||
|       language && language.tm_scope | ||||
|     end | ||||
|  | ||||
|     # Public: Highlight syntax of blob | ||||
|     # | ||||
|     # options - A Hash of options (defaults to {}) | ||||
|     # | ||||
|     # Returns html String | ||||
|     def colorize(options = {}) | ||||
|       return unless safe_to_colorize? | ||||
|       options[:options] ||= {} | ||||
|       options[:options][:encoding] ||= encoding | ||||
|       lexer.highlight(data, options) | ||||
|     end | ||||
|   end | ||||
| end | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| require 'escape_utils' | ||||
| require 'pygments' | ||||
| require 'yaml' | ||||
| begin | ||||
|   require 'yajl' | ||||
| @@ -302,10 +301,7 @@ module Linguist | ||||
|       # Set aliases | ||||
|       @aliases = [default_alias_name] + (attributes[:aliases] || []) | ||||
|  | ||||
|       # Lookup Lexer object | ||||
|       @lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) || | ||||
|         raise(ArgumentError, "#{@name} is missing lexer") | ||||
|  | ||||
|       # Load the TextMate scope name or try to guess one | ||||
|       @tm_scope = attributes[:tm_scope] || begin | ||||
|         context = case @type | ||||
|                   when :data, :markup, :prose | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,3 +1,3 @@ | ||||
| module Linguist | ||||
|   VERSION = "3.5.2" | ||||
|   VERSION = "4.0.0" | ||||
| end | ||||
|   | ||||
| @@ -4,13 +4,10 @@ require 'linguist/samples' | ||||
| require 'test/unit' | ||||
| require 'mocha/setup' | ||||
| require 'mime/types' | ||||
| require 'pygments' | ||||
|  | ||||
| class TestBlob < Test::Unit::TestCase | ||||
|   include Linguist | ||||
|  | ||||
|   Lexer = Pygments::Lexer | ||||
|  | ||||
|   def setup | ||||
|     # git blobs are normally loaded as ASCII-8BIT since they may contain data | ||||
|     # with arbitrary encoding not known ahead of time | ||||
| @@ -469,24 +466,7 @@ class TestBlob < Test::Unit::TestCase | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def test_lexer | ||||
|     assert_equal Lexer['Ruby'], blob("Ruby/foo.rb").lexer | ||||
|   end | ||||
|  | ||||
|   def test_colorize | ||||
|     assert_equal <<-HTML.chomp, blob("Ruby/foo.rb").colorize | ||||
| <div class="highlight"><pre><span class="k">module</span> <span class="nn">Foo</span> | ||||
| <span class="k">end</span> | ||||
| </pre></div> | ||||
|     HTML | ||||
|   end | ||||
|  | ||||
|   def test_colorize_does_skip_minified_files | ||||
|     assert_nil blob("JavaScript/jquery-1.6.1.min.js").colorize | ||||
|   end | ||||
|  | ||||
|   # Pygments.rb was taking exceeding long on this particular file | ||||
|   def test_colorize_doesnt_blow_up_with_files_with_high_ratio_of_long_lines | ||||
|     assert_nil blob("JavaScript/steelseries-min.js").colorize | ||||
|   def test_minified_files_not_safe_to_highlight | ||||
|     assert !blob("JavaScript/jquery-1.6.1.min.js").safe_to_colorize? | ||||
|   end | ||||
| end | ||||
|   | ||||
| @@ -1,65 +1,9 @@ | ||||
| require 'linguist/language' | ||||
|  | ||||
| require 'test/unit' | ||||
| require 'pygments' | ||||
|  | ||||
| class TestLanguage < Test::Unit::TestCase | ||||
|   include Linguist | ||||
|  | ||||
|   Lexer = Pygments::Lexer | ||||
|  | ||||
|   def test_lexer | ||||
|     assert_equal Lexer['ActionScript 3'], Language['ActionScript'].lexer | ||||
|     assert_equal Lexer['AspectJ'], Language['AspectJ'].lexer | ||||
|     assert_equal Lexer['Bash'], Language['Gentoo Ebuild'].lexer | ||||
|     assert_equal Lexer['Bash'], Language['Gentoo Eclass'].lexer | ||||
|     assert_equal Lexer['Bash'], Language['Shell'].lexer | ||||
|     assert_equal Lexer['C'], Language['OpenCL'].lexer | ||||
|     assert_equal Lexer['C'], Language['XS'].lexer | ||||
|     assert_equal Lexer['C++'], Language['C++'].lexer | ||||
|     assert_equal Lexer['Chapel'], Language['Chapel'].lexer | ||||
|     assert_equal Lexer['Coldfusion HTML'], Language['ColdFusion'].lexer | ||||
|     assert_equal Lexer['Coq'], Language['Coq'].lexer | ||||
|     assert_equal Lexer['FSharp'], Language['F#'].lexer | ||||
|     assert_equal Lexer['FSharp'], Language['F#'].lexer | ||||
|     assert_equal Lexer['Fortran'], Language['FORTRAN'].lexer | ||||
|     assert_equal Lexer['Gherkin'], Language['Cucumber'].lexer | ||||
|     assert_equal Lexer['Groovy'], Language['Groovy'].lexer | ||||
|     assert_equal Lexer['HTML'], Language['HTML'].lexer | ||||
|     assert_equal Lexer['HTML+Django/Jinja'], Language['HTML+Django'].lexer | ||||
|     assert_equal Lexer['HTML+PHP'], Language['HTML+PHP'].lexer | ||||
|     assert_equal Lexer['HTTP'], Language['HTTP'].lexer | ||||
|     assert_equal Lexer['JSON'], Language['JSON'].lexer | ||||
|     assert_equal Lexer['Java'], Language['ChucK'].lexer | ||||
|     assert_equal Lexer['Java'], Language['Java'].lexer | ||||
|     assert_equal Lexer['JavaScript'], Language['JavaScript'].lexer | ||||
|     assert_equal Lexer['LSL'], Language['LSL'].lexer | ||||
|     assert_equal Lexer['MOOCode'], Language['Moocode'].lexer | ||||
|     assert_equal Lexer['MuPAD'], Language['mupad'].lexer | ||||
|     assert_equal Lexer['NASM'], Language['Assembly'].lexer | ||||
|     assert_equal Lexer['OCaml'], Language['OCaml'].lexer | ||||
|     assert_equal Lexer['Ooc'], Language['ooc'].lexer | ||||
|     assert_equal Lexer['OpenEdge ABL'], Language['OpenEdge ABL'].lexer | ||||
|     assert_equal Lexer['REBOL'], Language['Rebol'].lexer | ||||
|     assert_equal Lexer['RHTML'], Language['HTML+ERB'].lexer | ||||
|     assert_equal Lexer['RHTML'], Language['RHTML'].lexer | ||||
|     assert_equal Lexer['Ruby'], Language['Crystal'].lexer | ||||
|     assert_equal Lexer['Ruby'], Language['Mirah'].lexer | ||||
|     assert_equal Lexer['Ruby'], Language['Ruby'].lexer | ||||
|     assert_equal Lexer['S'], Language['R'].lexer | ||||
|     assert_equal Lexer['Common Lisp'], Language['Emacs Lisp'].lexer | ||||
|     assert_equal Lexer['Scheme'], Language['Nu'].lexer | ||||
|     assert_equal Lexer['Racket'], Language['Racket'].lexer | ||||
|     assert_equal Lexer['Scheme'], Language['Scheme'].lexer | ||||
|     assert_equal Lexer['Standard ML'], Language['Standard ML'].lexer | ||||
|     assert_equal Lexer['TeX'], Language['TeX'].lexer | ||||
|     assert_equal Lexer['Verilog'], Language['Verilog'].lexer | ||||
|     assert_equal Lexer['XSLT'], Language['XSLT'].lexer | ||||
|     assert_equal Lexer['aspx-vb'], Language['ASP'].lexer | ||||
|     assert_equal Lexer['haXe'], Language['Haxe'].lexer | ||||
|     assert_equal Lexer['reStructuredText'], Language['reStructuredText'].lexer | ||||
|   end | ||||
|  | ||||
|   def test_find_by_alias | ||||
|     assert_equal Language['ASP'], Language.find_by_alias('asp') | ||||
|     assert_equal Language['ASP'], Language.find_by_alias('aspx') | ||||
| @@ -421,22 +365,4 @@ class TestLanguage < Test::Unit::TestCase | ||||
|   def test_by_type | ||||
|     assert !Language.by_type(:prose).nil? | ||||
|   end | ||||
|  | ||||
|   def test_colorize | ||||
|     assert_equal <<-HTML.chomp, Language['Ruby'].colorize("def foo\n  'foo'\nend\n") | ||||
| <div class="highlight"><pre><span class="k">def</span> <span class="nf">foo</span> | ||||
|   <span class="s1">'foo'</span> | ||||
| <span class="k">end</span> | ||||
| </pre></div> | ||||
|     HTML | ||||
|   end | ||||
|  | ||||
|   def test_colorize_with_options | ||||
|     assert_equal <<-HTML.chomp, Language['Ruby'].colorize("def foo\n  'foo'\nend\n", :options => { :cssclass => "highlight highlight-ruby" }) | ||||
| <div class="highlight highlight-ruby"><pre><span class="k">def</span> <span class="nf">foo</span> | ||||
|   <span class="s1">'foo'</span> | ||||
| <span class="k">end</span> | ||||
| </pre></div> | ||||
|     HTML | ||||
|   end | ||||
| end | ||||
|   | ||||
							
								
								
									
										
											BIN
										
									
								
								vendor/cache/json-1.8.1.gem
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								vendor/cache/json-1.8.1.gem
									
									
									
									
										vendored
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								vendor/cache/posix-spawn-0.3.9.gem
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								vendor/cache/posix-spawn-0.3.9.gem
									
									
									
									
										vendored
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								vendor/cache/pygments.rb-0.6.0.gem
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								vendor/cache/pygments.rb-0.6.0.gem
									
									
									
									
										vendored
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								vendor/cache/yajl-ruby-1.1.0.gem
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								vendor/cache/yajl-ruby-1.1.0.gem
									
									
									
									
										vendored
									
									
								
							
										
											Binary file not shown.
										
									
								
							
		Reference in New Issue
	
	Block a user