Files
linguist/test/test_blob.rb
Andy Lindeman aa5a94cc3e Handle case where newline chars don't transcode to detected encoding
We've seen cases where binary files are detected as encodings such as
ISO-8859-8-I. This usually happens when the binary files are short, so
while the detector is mistaken, there is also not very much data for use
in the detection algorithm in the first place so it's understandable
that the detector was wrong.

In these cases, the code to convert ASCII newline characters to
encodings such as ISO-8859-8-I fails because there is no conversion
between them.

We now simply assume that the data is all one line in those cases. In
reality the data is binary, but this obviously difficult to detect
reliably.
2014-06-03 12:26:23 -04:00

419 lines
14 KiB
Ruby

require 'linguist/file_blob'
require 'linguist/samples'
require 'test/unit'
require 'mocha/setup'
require 'mime/types'
require 'pygments'
class TestBlob < Test::Unit::TestCase
include Linguist
Lexer = Pygments::Lexer
def setup
# git blobs are normally loaded as ASCII-8BIT since they may contain data
# with arbitrary encoding not known ahead of time
@original_external = Encoding.default_external
Encoding.default_external = Encoding.find("ASCII-8BIT")
end
def teardown
Encoding.default_external = @original_external
end
def samples_path
File.expand_path("../../samples", __FILE__)
end
def blob(name)
name = File.join(samples_path, name) unless name =~ /^\//
FileBlob.new(name, samples_path)
end
def script_blob(name)
blob = blob(name)
blob.instance_variable_set(:@name, 'script')
blob
end
def test_name
assert_equal "foo.rb", blob("foo.rb").name
end
def test_mime_type
assert_equal "application/postscript", blob("Binary/octocat.ai").mime_type
assert_equal "application/x-ruby", blob("Ruby/grit.rb").mime_type
assert_equal "application/x-sh", blob("Shell/script.sh").mime_type
assert_equal "application/xml", blob("XML/bar.xml").mime_type
assert_equal "audio/ogg", blob("Binary/foo.ogg").mime_type
assert_equal "text/plain", blob("Text/README").mime_type
end
def test_content_type
assert_equal "application/pdf", blob("Binary/foo.pdf").content_type
assert_equal "audio/ogg", blob("Binary/foo.ogg").content_type
assert_equal "image/png", blob("Binary/foo.png").content_type
assert_equal "text/plain; charset=iso-8859-2", blob("Text/README").content_type
end
def test_disposition
assert_equal "attachment; filename=foo+bar.jar", blob("Binary/foo bar.jar").disposition
assert_equal "attachment; filename=foo.bin", blob("Binary/foo.bin").disposition
assert_equal "attachment; filename=linguist.gem", blob("Binary/linguist.gem").disposition
assert_equal "attachment; filename=octocat.ai", blob("Binary/octocat.ai").disposition
assert_equal "inline", blob("Text/README").disposition
assert_equal "inline", blob("Text/foo.txt").disposition
assert_equal "inline", blob("Ruby/grit.rb").disposition
assert_equal "inline", blob("Binary/octocat.png").disposition
end
def test_data
assert_equal "module Foo\nend\n", blob("Ruby/foo.rb").data
end
def test_lines
assert_equal ["module Foo", "end", ""], blob("Ruby/foo.rb").lines
assert_equal ["line 1", "line 2", ""], blob("Text/mac.txt").lines
assert_equal 475, blob("Emacs Lisp/ess-julia.el").lines.length
end
def test_lines_maintains_original_encoding
# Even if the file's encoding is detected as something like UTF-16LE,
# earlier versions of the gem made implicit guarantees that the encoding of
# each `line` is in the same encoding as the file was originally read (in
# practice, UTF-8 or ASCII-8BIT)
assert_equal Encoding.default_external, blob("Text/utf16le.txt").lines.first.encoding
end
def test_size
assert_equal 15, blob("Ruby/foo.rb").size
end
def test_loc
assert_equal 3, blob("Ruby/foo.rb").loc
end
def test_sloc
assert_equal 2, blob("Ruby/foo.rb").sloc
assert_equal 3, blob("Text/utf16le-windows.txt").sloc
assert_equal 1, blob("Text/iso8859-8-i.txt").sloc
end
def test_encoding
assert_equal "ISO-8859-2", blob("Text/README").encoding
assert_equal "ISO-8859-1", blob("Text/dump.sql").encoding
assert_equal "UTF-8", blob("Text/foo.txt").encoding
assert_equal "UTF-16LE", blob("Text/utf16le.txt").encoding
assert_equal "UTF-16LE", blob("Text/utf16le-windows.txt").encoding
assert_nil blob("Binary/dog.o").encoding
end
def test_binary
# Large blobs aren't loaded
large_blob = blob("git.exe")
large_blob.instance_eval do
def data; end
end
assert large_blob.binary?
assert blob("Binary/git.deb").binary?
assert blob("Binary/git.exe").binary?
assert blob("Binary/hello.pbc").binary?
assert blob("Binary/linguist.gem").binary?
assert blob("Binary/octocat.ai").binary?
assert blob("Binary/octocat.png").binary?
assert blob("Binary/zip").binary?
assert !blob("Text/README").binary?
assert !blob("Text/file.txt").binary?
assert !blob("Ruby/foo.rb").binary?
assert !blob("Perl/script.pl").binary?
end
def test_text
assert blob("Text/README").text?
assert blob("Text/dump.sql").text?
assert blob("Text/file.json").text?
assert blob("Text/file.txt").text?
assert blob("Text/md").text?
assert blob("Shell/script.sh").text?
assert blob("Text/txt").text?
end
def test_image
assert blob("Binary/octocat.gif").image?
assert blob("Binary/octocat.jpeg").image?
assert blob("Binary/octocat.jpg").image?
assert blob("Binary/octocat.png").image?
assert !blob("Binary/octocat.ai").image?
assert !blob("Binary/octocat.psd").image?
end
def test_solid
assert blob("Binary/cube.stl").solid?
assert blob("Text/cube.stl").solid?
end
def test_csv
assert blob("Text/cars.csv").csv?
end
def test_pdf
assert blob("Binary/foo.pdf").pdf?
end
def test_viewable
assert blob("Text/README").viewable?
assert blob("Ruby/foo.rb").viewable?
assert blob("Perl/script.pl").viewable?
assert !blob("Binary/linguist.gem").viewable?
assert !blob("Binary/octocat.ai").viewable?
assert !blob("Binary/octocat.png").viewable?
end
def test_generated
assert !blob("Text/README").generated?
# Xcode project files
assert blob("XML/MainMenu.xib").generated?
assert blob("Binary/MainMenu.nib").generated?
assert blob("XML/project.pbxproj").generated?
# Gemfile.locks
assert blob("Gemfile.lock").generated?
# Generated .NET Docfiles
assert blob("XML/net_docfile.xml").generated?
# Long line
assert !blob("JavaScript/uglify.js").generated?
# Inlined JS, but mostly code
assert !blob("JavaScript/json2_backbone.js").generated?
# Minified JS
assert !blob("JavaScript/jquery-1.6.1.js").generated?
assert blob("JavaScript/jquery-1.6.1.min.js").generated?
assert blob("JavaScript/jquery-1.4.2.min.js").generated?
# CoffeeScript-generated JS
# TODO
# TypeScript-generated JS
# TODO
# Composer generated composer.lock file
assert blob("JSON/composer.lock").generated?
# PEG.js-generated parsers
assert blob("JavaScript/parser.js").generated?
# Generated PostScript
assert !blob("PostScript/sierpinski.ps").generated?
# These examples are too basic to tell
assert !blob("JavaScript/empty.js").generated?
assert !blob("JavaScript/hello.js").generated?
assert blob("JavaScript/intro-old.js").generated?
assert blob("JavaScript/classes-old.js").generated?
assert blob("JavaScript/intro.js").generated?
assert blob("JavaScript/classes.js").generated?
# Protocol Buffer generated code
assert blob("C++/protocol-buffer.pb.h").generated?
assert blob("C++/protocol-buffer.pb.cc").generated?
assert blob("Java/ProtocolBuffer.java").generated?
assert blob("Python/protocol_buffer_pb2.py").generated?
# Generated JNI
assert blob("C/jni_layer.h").generated?
# Minified CSS
assert !blob("CSS/bootstrap.css").generated?
assert blob("CSS/bootstrap.min.css").generated?
# Generated VCR
assert blob("YAML/vcr_cassette.yml").generated?
assert Linguist::Generated.generated?("node_modules/grunt/lib/grunt.js", nil)
end
def test_vendored
assert !blob("Text/README").vendored?
assert !blob("ext/extconf.rb").vendored?
# Dependencies
assert blob("dependencies/windows/headers/GL/glext.h").vendored?
# Node dependencies
assert blob("node_modules/coffee-script/lib/coffee-script.js").vendored?
# Bower Components
assert blob("bower_components/custom/custom.js").vendored?
assert blob("app/bower_components/custom/custom.js").vendored?
assert blob("vendor/assets/bower_components/custom/custom.js").vendored?
# Rails vendor/
assert blob("vendor/plugins/will_paginate/lib/will_paginate.rb").vendored?
# 'thirdparty' directory
assert blob("thirdparty/lib/main.c").vendored?
# C deps
assert blob("deps/http_parser/http_parser.c").vendored?
assert blob("deps/v8/src/v8.h").vendored?
# Debian packaging
assert blob("debian/cron.d").vendored?
# Prototype
assert !blob("public/javascripts/application.js").vendored?
assert blob("public/javascripts/prototype.js").vendored?
assert blob("public/javascripts/effects.js").vendored?
assert blob("public/javascripts/controls.js").vendored?
assert blob("public/javascripts/dragdrop.js").vendored?
# jQuery
assert blob("jquery.js").vendored?
assert blob("public/javascripts/jquery.js").vendored?
assert blob("public/javascripts/jquery.min.js").vendored?
assert blob("public/javascripts/jquery-1.7.js").vendored?
assert blob("public/javascripts/jquery-1.7.min.js").vendored?
assert blob("public/javascripts/jquery-1.5.2.js").vendored?
assert blob("public/javascripts/jquery-1.6.1.js").vendored?
assert blob("public/javascripts/jquery-1.6.1.min.js").vendored?
assert blob("public/javascripts/jquery-1.10.1.js").vendored?
assert blob("public/javascripts/jquery-1.10.1.min.js").vendored?
assert !blob("public/javascripts/jquery.github.menu.js").vendored?
# jQuery UI
assert blob("themes/ui-lightness/jquery-ui.css").vendored?
assert blob("themes/ui-lightness/jquery-ui-1.8.22.custom.css").vendored?
assert blob("themes/ui-lightness/jquery.ui.accordion.css").vendored?
assert blob("ui/i18n/jquery.ui.datepicker-ar.js").vendored?
assert blob("ui/i18n/jquery-ui-i18n.js").vendored?
assert blob("ui/jquery.effects.blind.js").vendored?
assert blob("ui/jquery-ui-1.8.22.custom.js").vendored?
assert blob("ui/jquery-ui-1.8.22.custom.min.js").vendored?
assert blob("ui/jquery-ui-1.8.22.js").vendored?
assert blob("ui/jquery-ui-1.8.js").vendored?
assert blob("ui/jquery-ui.min.js").vendored?
assert blob("ui/jquery.ui.accordion.js").vendored?
assert blob("ui/minified/jquery.effects.blind.min.js").vendored?
assert blob("ui/minified/jquery.ui.accordion.min.js").vendored?
# MooTools
assert blob("public/javascripts/mootools-core-1.3.2-full-compat.js").vendored?
assert blob("public/javascripts/mootools-core-1.3.2-full-compat-yc.js").vendored?
# Dojo
assert blob("public/javascripts/dojo.js").vendored?
# MochiKit
assert blob("public/javascripts/MochiKit.js").vendored?
# YUI
assert blob("public/javascripts/yahoo-dom-event.js").vendored?
assert blob("public/javascripts/yahoo-min.js").vendored?
assert blob("public/javascripts/yuiloader-dom-event.js").vendored?
# WYS editors
assert blob("public/javascripts/ckeditor.js").vendored?
assert blob("public/javascripts/tiny_mce.js").vendored?
assert blob("public/javascripts/tiny_mce_popup.js").vendored?
assert blob("public/javascripts/tiny_mce_src.js").vendored?
# AngularJS
assert blob("public/javascripts/angular.js").vendored?
assert blob("public/javascripts/angular.min.js").vendored?
# D3.js
assert blob("public/javascripts/d3.v3.js").vendored?
assert blob("public/javascripts/d3.v3.min.js").vendored?
# Modernizr
assert blob("public/javascripts/modernizr-2.7.1.js").vendored?
assert blob("public/javascripts/modernizr.custom.01009.js").vendored?
# Fabric
assert blob("fabfile.py").vendored?
# WAF
assert blob("waf").vendored?
# Visual Studio IntelliSense
assert blob("Scripts/jquery-1.7-vsdoc.js").vendored?
# Microsoft Ajax
assert blob("Scripts/MicrosoftAjax.debug.js").vendored?
assert blob("Scripts/MicrosoftAjax.js").vendored?
assert blob("Scripts/MicrosoftMvcAjax.debug.js").vendored?
assert blob("Scripts/MicrosoftMvcAjax.js").vendored?
assert blob("Scripts/MicrosoftMvcValidation.debug.js").vendored?
assert blob("Scripts/MicrosoftMvcValidation.js").vendored?
# jQuery validation plugin (MS bundles this with asp.net mvc)
assert blob("Scripts/jquery.validate.js").vendored?
assert blob("Scripts/jquery.validate.min.js").vendored?
assert blob("Scripts/jquery.validate.unobtrusive.js").vendored?
assert blob("Scripts/jquery.validate.unobtrusive.min.js").vendored?
assert blob("Scripts/jquery.unobtrusive-ajax.js").vendored?
assert blob("Scripts/jquery.unobtrusive-ajax.min.js").vendored?
# NuGet Packages
assert blob("packages/Modernizr.2.0.6/Content/Scripts/modernizr-2.0.6-development-only.js").vendored?
# Test fixtures
assert blob("test/fixtures/random.rkt").vendored?
assert blob("Test/fixtures/random.rkt").vendored?
# Cordova/PhoneGap
assert blob("cordova.js").vendored?
assert blob("cordova.min.js").vendored?
assert blob("cordova-2.1.0.js").vendored?
assert blob("cordova-2.1.0.min.js").vendored?
# Vagrant
assert blob("Vagrantfile").vendored?
# Gradle
assert blob("gradlew").vendored?
assert blob("gradlew.bat").vendored?
assert blob("gradle/wrapper/gradle-wrapper.properties").vendored?
assert blob("subproject/gradlew").vendored?
assert blob("subproject/gradlew.bat").vendored?
assert blob("subproject/gradle/wrapper/gradle-wrapper.properties").vendored?
end
def test_language
Samples.each do |sample|
blob = blob(sample[:path])
assert blob.language, "No language for #{sample[:path]}"
assert_equal sample[:language], blob.language.name, blob.name
end
end
def test_lexer
assert_equal Lexer['Ruby'], blob("Ruby/foo.rb").lexer
end
def test_colorize
assert_equal <<-HTML.chomp, blob("Ruby/foo.rb").colorize
<div class="highlight"><pre><span class="k">module</span> <span class="nn">Foo</span>
<span class="k">end</span>
</pre></div>
HTML
end
def test_colorize_does_skip_minified_files
assert_nil blob("JavaScript/jquery-1.6.1.min.js").colorize
end
# Pygments.rb was taking exceeding long on this particular file
def test_colorize_doesnt_blow_up_with_files_with_high_ratio_of_long_lines
assert_nil blob("JavaScript/steelseries-min.js").colorize
end
end