mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	MemoryBlob class: wrapper around the content of a file
Makes it possible to detect the language of a snippet of code without having an actual file on disk Will allow github-markup to use Linguist without restricting its API
This commit is contained in:
		
							
								
								
									
										73
									
								
								lib/linguist/memory_blob.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								lib/linguist/memory_blob.rb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | |||||||
|  | require 'linguist/blob_helper' | ||||||
|  |  | ||||||
|  | module Linguist | ||||||
|  |   # A MemoryBlob is a wrapper around the content of a file to make it quack | ||||||
|  |   # like a Grit::Blob. It provides the basic interface: `name`, | ||||||
|  |   # `data`, `path` and `size`. | ||||||
|  |   class MemoryBlob | ||||||
|  |     include BlobHelper | ||||||
|  |  | ||||||
|  |     # Public: Initialize a new MemoryBlob. | ||||||
|  |     # | ||||||
|  |     # path    - A path String (does not necessarily exists on the file system). | ||||||
|  |     # content - Content of the file. | ||||||
|  |     # | ||||||
|  |     # Returns a FileBlob. | ||||||
|  |     def initialize(path, content) | ||||||
|  |       @path = path | ||||||
|  |       @content = content | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     # Public: Filename | ||||||
|  |     # | ||||||
|  |     # Examples | ||||||
|  |     # | ||||||
|  |     #   MemoryBlob.new("/path/to/linguist/lib/linguist.rb", "").path | ||||||
|  |     #   # =>  "/path/to/linguist/lib/linguist.rb" | ||||||
|  |     # | ||||||
|  |     # Returns a String | ||||||
|  |     attr_reader :path | ||||||
|  |  | ||||||
|  |     # Public: File name | ||||||
|  |     # | ||||||
|  |     # Returns a String | ||||||
|  |     def name | ||||||
|  |       File.basename(@path) | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     # Public: File contents. | ||||||
|  |     # | ||||||
|  |     # Returns a String. | ||||||
|  |     def data | ||||||
|  |       @content | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     # Public: Get byte size | ||||||
|  |     # | ||||||
|  |     # Returns an Integer. | ||||||
|  |     def size | ||||||
|  |       @content.bytesize | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     # Public: Get file extension. | ||||||
|  |     # | ||||||
|  |     # Returns a String. | ||||||
|  |     def extension | ||||||
|  |       extensions.last || "" | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     # Public: Return an array of the file extensions | ||||||
|  |     # | ||||||
|  |     #     >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions | ||||||
|  |     #     => [".html.erb", ".erb"] | ||||||
|  |     # | ||||||
|  |     # Returns an Array | ||||||
|  |     def extensions | ||||||
|  |       basename, *segments = name.downcase.split(".") | ||||||
|  |  | ||||||
|  |       segments.map.with_index do |segment, index| | ||||||
|  |         "." + segments[index..-1].join(".") | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | end | ||||||
| @@ -3,14 +3,21 @@ require "minitest/autorun" | |||||||
| require "mocha/setup" | require "mocha/setup" | ||||||
| require "linguist" | require "linguist" | ||||||
| require 'color-proximity' | require 'color-proximity' | ||||||
|  | require "linguist/memory_blob" | ||||||
|  |  | ||||||
| def fixtures_path | def fixtures_path | ||||||
|   File.expand_path("../fixtures", __FILE__) |   File.expand_path("../fixtures", __FILE__) | ||||||
| end | end | ||||||
|  |  | ||||||
| def fixture_blob(name) | def fixture_blob(name) | ||||||
|   name = File.join(fixtures_path, name) unless name =~ /^\// |   filepath = (name =~ /^\//)? name : File.join(fixtures_path, name) | ||||||
|   Linguist::FileBlob.new(name, fixtures_path) |   Linguist::FileBlob.new(filepath, fixtures_path) | ||||||
|  | end | ||||||
|  |  | ||||||
|  | def fixture_blob_memory(name) | ||||||
|  |   filepath = (name =~ /^\//)? name : File.join(fixtures_path, name) | ||||||
|  |   content = File.read(filepath) | ||||||
|  |   Linguist::MemoryBlob.new(name, content) | ||||||
| end | end | ||||||
|  |  | ||||||
| def samples_path | def samples_path | ||||||
| @@ -18,6 +25,12 @@ def samples_path | |||||||
| end | end | ||||||
|  |  | ||||||
| def sample_blob(name) | def sample_blob(name) | ||||||
|   name = File.join(samples_path, name) unless name =~ /^\// |   filepath = (name =~ /^\//)? name : File.join(samples_path, name) | ||||||
|   Linguist::FileBlob.new(name, samples_path) |   Linguist::FileBlob.new(filepath, samples_path) | ||||||
|  | end | ||||||
|  |  | ||||||
|  | def sample_blob_memory(name) | ||||||
|  |   filepath = (name =~ /^\//)? name : File.join(samples_path, name) | ||||||
|  |   content = File.read(filepath) | ||||||
|  |   Linguist::MemoryBlob.new(name, content) | ||||||
| end | end | ||||||
|   | |||||||
							
								
								
									
										290
									
								
								test/test_memory_blob.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										290
									
								
								test/test_memory_blob.rb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,290 @@ | |||||||
|  | require_relative "./helper" | ||||||
|  |  | ||||||
|  | class TestBlob < Minitest::Test | ||||||
|  |   include Linguist | ||||||
|  |  | ||||||
|  |   def setup | ||||||
|  |     # git blobs are normally loaded as ASCII-8BIT since they may contain data | ||||||
|  |     # with arbitrary encoding not known ahead of time | ||||||
|  |     @original_external = Encoding.default_external | ||||||
|  |     Encoding.default_external = Encoding.find("ASCII-8BIT") | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def teardown | ||||||
|  |     Encoding.default_external = @original_external | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def script_blob(name) | ||||||
|  |     blob = sample_blob_memory(name) | ||||||
|  |     blob.instance_variable_set(:@name, 'script') | ||||||
|  |     blob | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_name | ||||||
|  |     assert_equal "foo.rb", sample_blob_memory("Ruby/foo.rb").name | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_mime_type | ||||||
|  |     assert_equal "application/postscript", fixture_blob_memory("Binary/octocat.ai").mime_type | ||||||
|  |     assert_equal "application/x-ruby", sample_blob_memory("Ruby/grit.rb").mime_type | ||||||
|  |     assert_equal "application/x-sh", sample_blob_memory("Shell/script.sh").mime_type | ||||||
|  |     assert_equal "text/plain", fixture_blob_memory("Data/README").mime_type | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_content_type | ||||||
|  |     assert_equal "application/pdf", fixture_blob_memory("Binary/foo.pdf").content_type | ||||||
|  |     assert_equal "image/png", fixture_blob_memory("Binary/foo.png").content_type | ||||||
|  |     assert_equal "text/plain; charset=iso-8859-2", fixture_blob_memory("Data/README").content_type | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_disposition | ||||||
|  |     assert_equal "attachment; filename=foo+bar.jar", fixture_blob_memory("Binary/foo bar.jar").disposition | ||||||
|  |     assert_equal "attachment; filename=foo.bin", fixture_blob_memory("Binary/foo.bin").disposition | ||||||
|  |     assert_equal "attachment; filename=linguist.gem", fixture_blob_memory("Binary/linguist.gem").disposition | ||||||
|  |     assert_equal "attachment; filename=octocat.ai", fixture_blob_memory("Binary/octocat.ai").disposition | ||||||
|  |     assert_equal "inline", fixture_blob_memory("Data/README").disposition | ||||||
|  |     assert_equal "inline", sample_blob_memory("Text/foo.txt").disposition | ||||||
|  |     assert_equal "inline", sample_blob_memory("Ruby/grit.rb").disposition | ||||||
|  |     assert_equal "inline", fixture_blob_memory("Binary/octocat.png").disposition | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_data | ||||||
|  |     assert_equal "module Foo\nend\n", sample_blob_memory("Ruby/foo.rb").data | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_lines | ||||||
|  |     assert_equal ["module Foo", "end", ""], sample_blob_memory("Ruby/foo.rb").lines | ||||||
|  |     assert_equal ["line 1", "line 2", ""], sample_blob_memory("Text/mac.txt").lines | ||||||
|  |     assert_equal 475, sample_blob_memory("Emacs Lisp/ess-julia.el").lines.length | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_lines_maintains_original_encoding | ||||||
|  |     # Even if the file's encoding is detected as something like UTF-16LE, | ||||||
|  |     # earlier versions of the gem made implicit guarantees that the encoding of | ||||||
|  |     # each `line` is in the same encoding as the file was originally read (in | ||||||
|  |     # practice, UTF-8 or ASCII-8BIT) | ||||||
|  |     assert_equal Encoding.default_external, fixture_blob_memory("Data/utf16le").lines.first.encoding | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_size | ||||||
|  |     assert_equal 15, sample_blob_memory("Ruby/foo.rb").size | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_loc | ||||||
|  |     assert_equal 3, sample_blob_memory("Ruby/foo.rb").loc | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_sloc | ||||||
|  |     assert_equal 2, sample_blob_memory("Ruby/foo.rb").sloc | ||||||
|  |     assert_equal 3, fixture_blob_memory("Data/utf16le-windows").sloc | ||||||
|  |     assert_equal 1, fixture_blob_memory("Data/iso8859-8-i").sloc | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_encoding | ||||||
|  |     assert_equal "ISO-8859-2", fixture_blob_memory("Data/README").encoding | ||||||
|  |     assert_equal "ISO-8859-2", fixture_blob_memory("Data/README").ruby_encoding | ||||||
|  |     assert_equal "UTF-8", sample_blob_memory("Text/foo.txt").encoding | ||||||
|  |     assert_equal "UTF-8", sample_blob_memory("Text/foo.txt").ruby_encoding | ||||||
|  |     assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le").encoding | ||||||
|  |     assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le").ruby_encoding | ||||||
|  |     assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le-windows").encoding | ||||||
|  |     assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le-windows").ruby_encoding | ||||||
|  |     assert_equal "ISO-2022-KR", sample_blob_memory("Text/ISO-2022-KR.txt").encoding | ||||||
|  |     assert_equal "binary", sample_blob_memory("Text/ISO-2022-KR.txt").ruby_encoding | ||||||
|  |     assert_nil fixture_blob_memory("Binary/dog.o").encoding | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_binary | ||||||
|  |     assert fixture_blob_memory("Binary/git.deb").binary? | ||||||
|  |     assert fixture_blob_memory("Binary/hello.pbc").binary? | ||||||
|  |     assert fixture_blob_memory("Binary/linguist.gem").binary? | ||||||
|  |     assert fixture_blob_memory("Binary/octocat.ai").binary? | ||||||
|  |     assert fixture_blob_memory("Binary/octocat.png").binary? | ||||||
|  |     assert fixture_blob_memory("Binary/zip").binary? | ||||||
|  |     assert !fixture_blob_memory("Data/README").binary? | ||||||
|  |     assert !sample_blob_memory("Ruby/foo.rb").binary? | ||||||
|  |     assert !sample_blob_memory("Perl/script.pl").binary? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_all_binary | ||||||
|  |     Samples.each do |sample| | ||||||
|  |       blob = sample_blob_memory(sample[:path]) | ||||||
|  |       assert ! (blob.likely_binary? || blob.binary?), "#{sample[:path]} is a binary file" | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_text | ||||||
|  |     assert fixture_blob_memory("Data/README").text? | ||||||
|  |     assert fixture_blob_memory("Data/md").text? | ||||||
|  |     assert sample_blob_memory("Shell/script.sh").text? | ||||||
|  |     assert fixture_blob_memory("Data/txt").text? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_image | ||||||
|  |     assert fixture_blob_memory("Binary/octocat.png").image? | ||||||
|  |     assert !fixture_blob_memory("Binary/octocat.ai").image? | ||||||
|  |     assert !fixture_blob_memory("Binary/octocat.psd").image? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_solid | ||||||
|  |     assert fixture_blob_memory("Binary/cube.stl").solid? | ||||||
|  |     assert fixture_blob_memory("Data/cube.stl").solid? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_csv | ||||||
|  |     assert fixture_blob_memory("Data/cars.csv").csv? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_pdf | ||||||
|  |     assert fixture_blob_memory("Binary/foo.pdf").pdf? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_viewable | ||||||
|  |     assert fixture_blob_memory("Data/README").viewable? | ||||||
|  |     assert sample_blob_memory("Ruby/foo.rb").viewable? | ||||||
|  |     assert sample_blob_memory("Perl/script.pl").viewable? | ||||||
|  |     assert !fixture_blob_memory("Binary/linguist.gem").viewable? | ||||||
|  |     assert !fixture_blob_memory("Binary/octocat.ai").viewable? | ||||||
|  |     assert !fixture_blob_memory("Binary/octocat.png").viewable? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_generated | ||||||
|  |     assert !fixture_blob_memory("Data/README").generated? | ||||||
|  |  | ||||||
|  |     # Generated .NET Docfiles | ||||||
|  |     assert sample_blob_memory("XML/net_docfile.xml").generated? | ||||||
|  |  | ||||||
|  |     # Long line | ||||||
|  |     assert !sample_blob_memory("JavaScript/uglify.js").generated? | ||||||
|  |  | ||||||
|  |     # Inlined JS, but mostly code | ||||||
|  |     assert !sample_blob_memory("JavaScript/json2_backbone.js").generated? | ||||||
|  |  | ||||||
|  |     # Minified JS | ||||||
|  |     assert !sample_blob_memory("JavaScript/jquery-1.6.1.js").generated? | ||||||
|  |     assert sample_blob_memory("JavaScript/jquery-1.6.1.min.js").generated? | ||||||
|  |     assert sample_blob_memory("JavaScript/jquery-1.4.2.min.js").generated? | ||||||
|  |  | ||||||
|  |     # Composer generated composer.lock file | ||||||
|  |     assert sample_blob_memory("JSON/composer.lock").generated? | ||||||
|  |  | ||||||
|  |     # PEG.js-generated parsers | ||||||
|  |     assert sample_blob_memory("JavaScript/parser.js").generated? | ||||||
|  |  | ||||||
|  |     # Generated PostScript | ||||||
|  |     assert !sample_blob_memory("PostScript/sierpinski.ps").generated? | ||||||
|  |  | ||||||
|  |     # These examples are too basic to tell | ||||||
|  |     assert !sample_blob_memory("JavaScript/hello.js").generated? | ||||||
|  |  | ||||||
|  |     assert sample_blob_memory("JavaScript/intro-old.js").generated? | ||||||
|  |     assert sample_blob_memory("JavaScript/classes-old.js").generated? | ||||||
|  |  | ||||||
|  |     assert sample_blob_memory("JavaScript/intro.js").generated? | ||||||
|  |     assert sample_blob_memory("JavaScript/classes.js").generated? | ||||||
|  |  | ||||||
|  |     # Protocol Buffer generated code | ||||||
|  |     assert sample_blob_memory("C++/protocol-buffer.pb.h").generated? | ||||||
|  |     assert sample_blob_memory("C++/protocol-buffer.pb.cc").generated? | ||||||
|  |     assert sample_blob_memory("Java/ProtocolBuffer.java").generated? | ||||||
|  |     assert sample_blob_memory("Python/protocol_buffer_pb2.py").generated? | ||||||
|  |     assert sample_blob_memory("Go/api.pb.go").generated? | ||||||
|  |     assert sample_blob_memory("Go/embedded.go").generated? | ||||||
|  |  | ||||||
|  |     # Apache Thrift generated code | ||||||
|  |     assert sample_blob_memory("Python/gen-py-linguist-thrift.py").generated? | ||||||
|  |     assert sample_blob_memory("Go/gen-go-linguist-thrift.go").generated? | ||||||
|  |     assert sample_blob_memory("Java/gen-java-linguist-thrift.java").generated? | ||||||
|  |     assert sample_blob_memory("JavaScript/gen-js-linguist-thrift.js").generated? | ||||||
|  |     assert sample_blob_memory("Ruby/gen-rb-linguist-thrift.rb").generated? | ||||||
|  |     assert sample_blob_memory("Objective-C/gen-cocoa-linguist-thrift.m").generated? | ||||||
|  |  | ||||||
|  |     # Generated JNI | ||||||
|  |     assert sample_blob_memory("C/jni_layer.h").generated? | ||||||
|  |  | ||||||
|  |     # Minified CSS | ||||||
|  |     assert !sample_blob_memory("CSS/bootstrap.css").generated? | ||||||
|  |     assert sample_blob_memory("CSS/bootstrap.min.css").generated? | ||||||
|  |  | ||||||
|  |     # Generated VCR | ||||||
|  |     assert sample_blob_memory("YAML/vcr_cassette.yml").generated? | ||||||
|  |  | ||||||
|  |     # Generated by Zephir | ||||||
|  |     assert !sample_blob_memory("Zephir/Router.zep").generated? | ||||||
|  |  | ||||||
|  |     # Cython-generated C/C++ | ||||||
|  |     assert sample_blob_memory("C/sgd_fast.c").generated? | ||||||
|  |     assert sample_blob_memory("C++/wrapper_inner.cpp").generated? | ||||||
|  |  | ||||||
|  |     # Unity3D-generated metadata | ||||||
|  |     assert sample_blob_memory("Unity3D Asset/Tiles.meta").generated? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_vendored | ||||||
|  |     assert !fixture_blob_memory("Data/README").vendored? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_language | ||||||
|  |     Samples.each do |sample| | ||||||
|  |       blob = sample_blob_memory(sample[:path]) | ||||||
|  |       assert blob.language, "No language for #{sample[:path]}" | ||||||
|  |       assert_equal sample[:language], blob.language.name, blob.name | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     # Test language detection for files which shouldn't be used as samples | ||||||
|  |     root = File.expand_path('../fixtures', __FILE__) | ||||||
|  |     Dir.entries(root).each do |language| | ||||||
|  |       next if language == '.' || language == '..' || language == 'Binary' || | ||||||
|  |               File.basename(language) == 'ace_modes.json' | ||||||
|  |  | ||||||
|  |       # Each directory contains test files of a language | ||||||
|  |       dirname = File.join(root, language) | ||||||
|  |       Dir.entries(dirname).each do |filename| | ||||||
|  |         # By default blob search the file in the samples; | ||||||
|  |         # thus, we need to give it the absolute path | ||||||
|  |         filepath = File.join(dirname, filename) | ||||||
|  |         next unless File.file?(filepath) | ||||||
|  |  | ||||||
|  |         blob = fixture_blob_memory(filepath) | ||||||
|  |         if language == 'Data' | ||||||
|  |           assert blob.language.nil?, "A language was found for #{filepath}" | ||||||
|  |         elsif language == 'Generated' | ||||||
|  |           assert blob.generated?, "#{filepath} is not a generated file" | ||||||
|  |         else | ||||||
|  |           assert blob.language, "No language for #{filepath}" | ||||||
|  |           assert_equal language, blob.language.name, blob.name | ||||||
|  |         end | ||||||
|  |       end | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_minified_files_not_safe_to_highlight | ||||||
|  |     assert !sample_blob_memory("JavaScript/jquery-1.6.1.min.js").safe_to_colorize? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_empty | ||||||
|  |     blob = Struct.new(:data) { include Linguist::BlobHelper } | ||||||
|  |  | ||||||
|  |     assert blob.new("").empty? | ||||||
|  |     assert blob.new(nil).empty? | ||||||
|  |     refute blob.new(" ").empty? | ||||||
|  |     refute blob.new("nope").empty? | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   def test_include_in_language_stats | ||||||
|  |     generated = sample_blob_memory("CSS/bootstrap.min.css") | ||||||
|  |     assert_predicate generated, :generated? | ||||||
|  |     refute_predicate generated, :include_in_language_stats? | ||||||
|  |  | ||||||
|  |     data = sample_blob_memory("Ant Build System/filenames/ant.xml") | ||||||
|  |     assert_equal :data, data.language.type | ||||||
|  |     refute_predicate data, :include_in_language_stats? | ||||||
|  |  | ||||||
|  |     prose = sample_blob_memory("Markdown/tender.md") | ||||||
|  |     assert_equal :prose, prose.language.type | ||||||
|  |     refute_predicate prose, :include_in_language_stats? | ||||||
|  |  | ||||||
|  |     included = sample_blob_memory("HTML/pages.html") | ||||||
|  |     assert_predicate included, :include_in_language_stats? | ||||||
|  |   end | ||||||
|  | end | ||||||
		Reference in New Issue
	
	Block a user