mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
MemoryBlob class: wrapper around the content of a file
Makes it possible to detect the language of a snippet of code without having an actual file on disk Will allow github-markup to use Linguist without restricting its API
This commit is contained in:
73
lib/linguist/memory_blob.rb
Normal file
73
lib/linguist/memory_blob.rb
Normal file
@@ -0,0 +1,73 @@
|
||||
require 'linguist/blob_helper'
|
||||
|
||||
module Linguist
|
||||
# A MemoryBlob is a wrapper around the content of a file to make it quack
|
||||
# like a Grit::Blob. It provides the basic interface: `name`,
|
||||
# `data`, `path` and `size`.
|
||||
class MemoryBlob
|
||||
include BlobHelper
|
||||
|
||||
# Public: Initialize a new MemoryBlob.
|
||||
#
|
||||
# path - A path String (does not necessarily exists on the file system).
|
||||
# content - Content of the file.
|
||||
#
|
||||
# Returns a FileBlob.
|
||||
def initialize(path, content)
|
||||
@path = path
|
||||
@content = content
|
||||
end
|
||||
|
||||
# Public: Filename
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# MemoryBlob.new("/path/to/linguist/lib/linguist.rb", "").path
|
||||
# # => "/path/to/linguist/lib/linguist.rb"
|
||||
#
|
||||
# Returns a String
|
||||
attr_reader :path
|
||||
|
||||
# Public: File name
|
||||
#
|
||||
# Returns a String
|
||||
def name
|
||||
File.basename(@path)
|
||||
end
|
||||
|
||||
# Public: File contents.
|
||||
#
|
||||
# Returns a String.
|
||||
def data
|
||||
@content
|
||||
end
|
||||
|
||||
# Public: Get byte size
|
||||
#
|
||||
# Returns an Integer.
|
||||
def size
|
||||
@content.bytesize
|
||||
end
|
||||
|
||||
# Public: Get file extension.
|
||||
#
|
||||
# Returns a String.
|
||||
def extension
|
||||
extensions.last || ""
|
||||
end
|
||||
|
||||
# Public: Return an array of the file extensions
|
||||
#
|
||||
# >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions
|
||||
# => [".html.erb", ".erb"]
|
||||
#
|
||||
# Returns an Array
|
||||
def extensions
|
||||
basename, *segments = name.downcase.split(".")
|
||||
|
||||
segments.map.with_index do |segment, index|
|
||||
"." + segments[index..-1].join(".")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -3,14 +3,21 @@ require "minitest/autorun"
|
||||
require "mocha/setup"
|
||||
require "linguist"
|
||||
require 'color-proximity'
|
||||
require "linguist/memory_blob"
|
||||
|
||||
def fixtures_path
|
||||
File.expand_path("../fixtures", __FILE__)
|
||||
end
|
||||
|
||||
def fixture_blob(name)
|
||||
name = File.join(fixtures_path, name) unless name =~ /^\//
|
||||
Linguist::FileBlob.new(name, fixtures_path)
|
||||
filepath = (name =~ /^\//)? name : File.join(fixtures_path, name)
|
||||
Linguist::FileBlob.new(filepath, fixtures_path)
|
||||
end
|
||||
|
||||
def fixture_blob_memory(name)
|
||||
filepath = (name =~ /^\//)? name : File.join(fixtures_path, name)
|
||||
content = File.read(filepath)
|
||||
Linguist::MemoryBlob.new(name, content)
|
||||
end
|
||||
|
||||
def samples_path
|
||||
@@ -18,6 +25,12 @@ def samples_path
|
||||
end
|
||||
|
||||
def sample_blob(name)
|
||||
name = File.join(samples_path, name) unless name =~ /^\//
|
||||
Linguist::FileBlob.new(name, samples_path)
|
||||
filepath = (name =~ /^\//)? name : File.join(samples_path, name)
|
||||
Linguist::FileBlob.new(filepath, samples_path)
|
||||
end
|
||||
|
||||
def sample_blob_memory(name)
|
||||
filepath = (name =~ /^\//)? name : File.join(samples_path, name)
|
||||
content = File.read(filepath)
|
||||
Linguist::MemoryBlob.new(name, content)
|
||||
end
|
||||
|
||||
290
test/test_memory_blob.rb
Normal file
290
test/test_memory_blob.rb
Normal file
@@ -0,0 +1,290 @@
|
||||
require_relative "./helper"
|
||||
|
||||
class TestBlob < Minitest::Test
|
||||
include Linguist
|
||||
|
||||
def setup
|
||||
# git blobs are normally loaded as ASCII-8BIT since they may contain data
|
||||
# with arbitrary encoding not known ahead of time
|
||||
@original_external = Encoding.default_external
|
||||
Encoding.default_external = Encoding.find("ASCII-8BIT")
|
||||
end
|
||||
|
||||
def teardown
|
||||
Encoding.default_external = @original_external
|
||||
end
|
||||
|
||||
def script_blob(name)
|
||||
blob = sample_blob_memory(name)
|
||||
blob.instance_variable_set(:@name, 'script')
|
||||
blob
|
||||
end
|
||||
|
||||
def test_name
|
||||
assert_equal "foo.rb", sample_blob_memory("Ruby/foo.rb").name
|
||||
end
|
||||
|
||||
def test_mime_type
|
||||
assert_equal "application/postscript", fixture_blob_memory("Binary/octocat.ai").mime_type
|
||||
assert_equal "application/x-ruby", sample_blob_memory("Ruby/grit.rb").mime_type
|
||||
assert_equal "application/x-sh", sample_blob_memory("Shell/script.sh").mime_type
|
||||
assert_equal "text/plain", fixture_blob_memory("Data/README").mime_type
|
||||
end
|
||||
|
||||
def test_content_type
|
||||
assert_equal "application/pdf", fixture_blob_memory("Binary/foo.pdf").content_type
|
||||
assert_equal "image/png", fixture_blob_memory("Binary/foo.png").content_type
|
||||
assert_equal "text/plain; charset=iso-8859-2", fixture_blob_memory("Data/README").content_type
|
||||
end
|
||||
|
||||
def test_disposition
|
||||
assert_equal "attachment; filename=foo+bar.jar", fixture_blob_memory("Binary/foo bar.jar").disposition
|
||||
assert_equal "attachment; filename=foo.bin", fixture_blob_memory("Binary/foo.bin").disposition
|
||||
assert_equal "attachment; filename=linguist.gem", fixture_blob_memory("Binary/linguist.gem").disposition
|
||||
assert_equal "attachment; filename=octocat.ai", fixture_blob_memory("Binary/octocat.ai").disposition
|
||||
assert_equal "inline", fixture_blob_memory("Data/README").disposition
|
||||
assert_equal "inline", sample_blob_memory("Text/foo.txt").disposition
|
||||
assert_equal "inline", sample_blob_memory("Ruby/grit.rb").disposition
|
||||
assert_equal "inline", fixture_blob_memory("Binary/octocat.png").disposition
|
||||
end
|
||||
|
||||
def test_data
|
||||
assert_equal "module Foo\nend\n", sample_blob_memory("Ruby/foo.rb").data
|
||||
end
|
||||
|
||||
def test_lines
|
||||
assert_equal ["module Foo", "end", ""], sample_blob_memory("Ruby/foo.rb").lines
|
||||
assert_equal ["line 1", "line 2", ""], sample_blob_memory("Text/mac.txt").lines
|
||||
assert_equal 475, sample_blob_memory("Emacs Lisp/ess-julia.el").lines.length
|
||||
end
|
||||
|
||||
def test_lines_maintains_original_encoding
|
||||
# Even if the file's encoding is detected as something like UTF-16LE,
|
||||
# earlier versions of the gem made implicit guarantees that the encoding of
|
||||
# each `line` is in the same encoding as the file was originally read (in
|
||||
# practice, UTF-8 or ASCII-8BIT)
|
||||
assert_equal Encoding.default_external, fixture_blob_memory("Data/utf16le").lines.first.encoding
|
||||
end
|
||||
|
||||
def test_size
|
||||
assert_equal 15, sample_blob_memory("Ruby/foo.rb").size
|
||||
end
|
||||
|
||||
def test_loc
|
||||
assert_equal 3, sample_blob_memory("Ruby/foo.rb").loc
|
||||
end
|
||||
|
||||
def test_sloc
|
||||
assert_equal 2, sample_blob_memory("Ruby/foo.rb").sloc
|
||||
assert_equal 3, fixture_blob_memory("Data/utf16le-windows").sloc
|
||||
assert_equal 1, fixture_blob_memory("Data/iso8859-8-i").sloc
|
||||
end
|
||||
|
||||
def test_encoding
|
||||
assert_equal "ISO-8859-2", fixture_blob_memory("Data/README").encoding
|
||||
assert_equal "ISO-8859-2", fixture_blob_memory("Data/README").ruby_encoding
|
||||
assert_equal "UTF-8", sample_blob_memory("Text/foo.txt").encoding
|
||||
assert_equal "UTF-8", sample_blob_memory("Text/foo.txt").ruby_encoding
|
||||
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le").encoding
|
||||
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le").ruby_encoding
|
||||
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le-windows").encoding
|
||||
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le-windows").ruby_encoding
|
||||
assert_equal "ISO-2022-KR", sample_blob_memory("Text/ISO-2022-KR.txt").encoding
|
||||
assert_equal "binary", sample_blob_memory("Text/ISO-2022-KR.txt").ruby_encoding
|
||||
assert_nil fixture_blob_memory("Binary/dog.o").encoding
|
||||
end
|
||||
|
||||
def test_binary
|
||||
assert fixture_blob_memory("Binary/git.deb").binary?
|
||||
assert fixture_blob_memory("Binary/hello.pbc").binary?
|
||||
assert fixture_blob_memory("Binary/linguist.gem").binary?
|
||||
assert fixture_blob_memory("Binary/octocat.ai").binary?
|
||||
assert fixture_blob_memory("Binary/octocat.png").binary?
|
||||
assert fixture_blob_memory("Binary/zip").binary?
|
||||
assert !fixture_blob_memory("Data/README").binary?
|
||||
assert !sample_blob_memory("Ruby/foo.rb").binary?
|
||||
assert !sample_blob_memory("Perl/script.pl").binary?
|
||||
end
|
||||
|
||||
def test_all_binary
|
||||
Samples.each do |sample|
|
||||
blob = sample_blob_memory(sample[:path])
|
||||
assert ! (blob.likely_binary? || blob.binary?), "#{sample[:path]} is a binary file"
|
||||
end
|
||||
end
|
||||
|
||||
def test_text
|
||||
assert fixture_blob_memory("Data/README").text?
|
||||
assert fixture_blob_memory("Data/md").text?
|
||||
assert sample_blob_memory("Shell/script.sh").text?
|
||||
assert fixture_blob_memory("Data/txt").text?
|
||||
end
|
||||
|
||||
def test_image
|
||||
assert fixture_blob_memory("Binary/octocat.png").image?
|
||||
assert !fixture_blob_memory("Binary/octocat.ai").image?
|
||||
assert !fixture_blob_memory("Binary/octocat.psd").image?
|
||||
end
|
||||
|
||||
def test_solid
|
||||
assert fixture_blob_memory("Binary/cube.stl").solid?
|
||||
assert fixture_blob_memory("Data/cube.stl").solid?
|
||||
end
|
||||
|
||||
def test_csv
|
||||
assert fixture_blob_memory("Data/cars.csv").csv?
|
||||
end
|
||||
|
||||
def test_pdf
|
||||
assert fixture_blob_memory("Binary/foo.pdf").pdf?
|
||||
end
|
||||
|
||||
def test_viewable
|
||||
assert fixture_blob_memory("Data/README").viewable?
|
||||
assert sample_blob_memory("Ruby/foo.rb").viewable?
|
||||
assert sample_blob_memory("Perl/script.pl").viewable?
|
||||
assert !fixture_blob_memory("Binary/linguist.gem").viewable?
|
||||
assert !fixture_blob_memory("Binary/octocat.ai").viewable?
|
||||
assert !fixture_blob_memory("Binary/octocat.png").viewable?
|
||||
end
|
||||
|
||||
def test_generated
|
||||
assert !fixture_blob_memory("Data/README").generated?
|
||||
|
||||
# Generated .NET Docfiles
|
||||
assert sample_blob_memory("XML/net_docfile.xml").generated?
|
||||
|
||||
# Long line
|
||||
assert !sample_blob_memory("JavaScript/uglify.js").generated?
|
||||
|
||||
# Inlined JS, but mostly code
|
||||
assert !sample_blob_memory("JavaScript/json2_backbone.js").generated?
|
||||
|
||||
# Minified JS
|
||||
assert !sample_blob_memory("JavaScript/jquery-1.6.1.js").generated?
|
||||
assert sample_blob_memory("JavaScript/jquery-1.6.1.min.js").generated?
|
||||
assert sample_blob_memory("JavaScript/jquery-1.4.2.min.js").generated?
|
||||
|
||||
# Composer generated composer.lock file
|
||||
assert sample_blob_memory("JSON/composer.lock").generated?
|
||||
|
||||
# PEG.js-generated parsers
|
||||
assert sample_blob_memory("JavaScript/parser.js").generated?
|
||||
|
||||
# Generated PostScript
|
||||
assert !sample_blob_memory("PostScript/sierpinski.ps").generated?
|
||||
|
||||
# These examples are too basic to tell
|
||||
assert !sample_blob_memory("JavaScript/hello.js").generated?
|
||||
|
||||
assert sample_blob_memory("JavaScript/intro-old.js").generated?
|
||||
assert sample_blob_memory("JavaScript/classes-old.js").generated?
|
||||
|
||||
assert sample_blob_memory("JavaScript/intro.js").generated?
|
||||
assert sample_blob_memory("JavaScript/classes.js").generated?
|
||||
|
||||
# Protocol Buffer generated code
|
||||
assert sample_blob_memory("C++/protocol-buffer.pb.h").generated?
|
||||
assert sample_blob_memory("C++/protocol-buffer.pb.cc").generated?
|
||||
assert sample_blob_memory("Java/ProtocolBuffer.java").generated?
|
||||
assert sample_blob_memory("Python/protocol_buffer_pb2.py").generated?
|
||||
assert sample_blob_memory("Go/api.pb.go").generated?
|
||||
assert sample_blob_memory("Go/embedded.go").generated?
|
||||
|
||||
# Apache Thrift generated code
|
||||
assert sample_blob_memory("Python/gen-py-linguist-thrift.py").generated?
|
||||
assert sample_blob_memory("Go/gen-go-linguist-thrift.go").generated?
|
||||
assert sample_blob_memory("Java/gen-java-linguist-thrift.java").generated?
|
||||
assert sample_blob_memory("JavaScript/gen-js-linguist-thrift.js").generated?
|
||||
assert sample_blob_memory("Ruby/gen-rb-linguist-thrift.rb").generated?
|
||||
assert sample_blob_memory("Objective-C/gen-cocoa-linguist-thrift.m").generated?
|
||||
|
||||
# Generated JNI
|
||||
assert sample_blob_memory("C/jni_layer.h").generated?
|
||||
|
||||
# Minified CSS
|
||||
assert !sample_blob_memory("CSS/bootstrap.css").generated?
|
||||
assert sample_blob_memory("CSS/bootstrap.min.css").generated?
|
||||
|
||||
# Generated VCR
|
||||
assert sample_blob_memory("YAML/vcr_cassette.yml").generated?
|
||||
|
||||
# Generated by Zephir
|
||||
assert !sample_blob_memory("Zephir/Router.zep").generated?
|
||||
|
||||
# Cython-generated C/C++
|
||||
assert sample_blob_memory("C/sgd_fast.c").generated?
|
||||
assert sample_blob_memory("C++/wrapper_inner.cpp").generated?
|
||||
|
||||
# Unity3D-generated metadata
|
||||
assert sample_blob_memory("Unity3D Asset/Tiles.meta").generated?
|
||||
end
|
||||
|
||||
def test_vendored
|
||||
assert !fixture_blob_memory("Data/README").vendored?
|
||||
end
|
||||
|
||||
def test_language
|
||||
Samples.each do |sample|
|
||||
blob = sample_blob_memory(sample[:path])
|
||||
assert blob.language, "No language for #{sample[:path]}"
|
||||
assert_equal sample[:language], blob.language.name, blob.name
|
||||
end
|
||||
|
||||
# Test language detection for files which shouldn't be used as samples
|
||||
root = File.expand_path('../fixtures', __FILE__)
|
||||
Dir.entries(root).each do |language|
|
||||
next if language == '.' || language == '..' || language == 'Binary' ||
|
||||
File.basename(language) == 'ace_modes.json'
|
||||
|
||||
# Each directory contains test files of a language
|
||||
dirname = File.join(root, language)
|
||||
Dir.entries(dirname).each do |filename|
|
||||
# By default blob search the file in the samples;
|
||||
# thus, we need to give it the absolute path
|
||||
filepath = File.join(dirname, filename)
|
||||
next unless File.file?(filepath)
|
||||
|
||||
blob = fixture_blob_memory(filepath)
|
||||
if language == 'Data'
|
||||
assert blob.language.nil?, "A language was found for #{filepath}"
|
||||
elsif language == 'Generated'
|
||||
assert blob.generated?, "#{filepath} is not a generated file"
|
||||
else
|
||||
assert blob.language, "No language for #{filepath}"
|
||||
assert_equal language, blob.language.name, blob.name
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_minified_files_not_safe_to_highlight
|
||||
assert !sample_blob_memory("JavaScript/jquery-1.6.1.min.js").safe_to_colorize?
|
||||
end
|
||||
|
||||
def test_empty
|
||||
blob = Struct.new(:data) { include Linguist::BlobHelper }
|
||||
|
||||
assert blob.new("").empty?
|
||||
assert blob.new(nil).empty?
|
||||
refute blob.new(" ").empty?
|
||||
refute blob.new("nope").empty?
|
||||
end
|
||||
|
||||
def test_include_in_language_stats
|
||||
generated = sample_blob_memory("CSS/bootstrap.min.css")
|
||||
assert_predicate generated, :generated?
|
||||
refute_predicate generated, :include_in_language_stats?
|
||||
|
||||
data = sample_blob_memory("Ant Build System/filenames/ant.xml")
|
||||
assert_equal :data, data.language.type
|
||||
refute_predicate data, :include_in_language_stats?
|
||||
|
||||
prose = sample_blob_memory("Markdown/tender.md")
|
||||
assert_equal :prose, prose.language.type
|
||||
refute_predicate prose, :include_in_language_stats?
|
||||
|
||||
included = sample_blob_memory("HTML/pages.html")
|
||||
assert_predicate included, :include_in_language_stats?
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user