mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Document blob helper
This commit is contained in:
@@ -6,19 +6,50 @@ require 'escape_utils'
|
|||||||
require 'yaml'
|
require 'yaml'
|
||||||
|
|
||||||
module Linguist
|
module Linguist
|
||||||
|
# BlobHelper is a mixin for Blobish classes that respond to "name",
|
||||||
|
# "data" and "size" such as Grit::Blob.
|
||||||
module BlobHelper
|
module BlobHelper
|
||||||
|
# Internal: Get a Pathname wrapper for Blob#name
|
||||||
|
#
|
||||||
|
# Returns a Pathname.
|
||||||
def pathname
|
def pathname
|
||||||
Pathname.new(name || "")
|
Pathname.new(name || "")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Get the actual blob mime type
|
||||||
|
#
|
||||||
|
# Examples
|
||||||
|
#
|
||||||
|
# # => 'text/plain'
|
||||||
|
# # => 'text/html'
|
||||||
|
#
|
||||||
|
# Returns a mime type String.
|
||||||
def mime_type
|
def mime_type
|
||||||
@mime_type ||= pathname.mime_type
|
@mime_type ||= pathname.mime_type
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Get the Content-Type header value
|
||||||
|
#
|
||||||
|
# This value is used when serving raw blobs.
|
||||||
|
#
|
||||||
|
# Examples
|
||||||
|
#
|
||||||
|
# # => 'text/plain; charset=utf-8'
|
||||||
|
# # => 'application/octet-stream'
|
||||||
|
#
|
||||||
|
# Returns a content type String.
|
||||||
def content_type
|
def content_type
|
||||||
pathname.content_type
|
pathname.content_type
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Get the Content-Disposition header value
|
||||||
|
#
|
||||||
|
# This value is used when serving raw blobs.
|
||||||
|
#
|
||||||
|
# # => "attachment; filename=file.tar"
|
||||||
|
# # => "inline"
|
||||||
|
#
|
||||||
|
# Returns a content disposition String.
|
||||||
def disposition
|
def disposition
|
||||||
case content_type
|
case content_type
|
||||||
when 'application/octet-stream', 'application/java-archive'
|
when 'application/octet-stream', 'application/java-archive'
|
||||||
@@ -28,40 +59,102 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def lines
|
# Public: Is the blob text?
|
||||||
@lines ||= data ? data.split("\n", -1) : []
|
#
|
||||||
end
|
# Return true or false
|
||||||
|
|
||||||
def loc
|
|
||||||
lines.size
|
|
||||||
end
|
|
||||||
|
|
||||||
def sloc
|
|
||||||
lines.grep(/\S/).size
|
|
||||||
end
|
|
||||||
|
|
||||||
def binary?
|
|
||||||
content_type.include?('octet') || !(text? || image?)
|
|
||||||
end
|
|
||||||
|
|
||||||
def text?
|
def text?
|
||||||
content_type[/(text|json)/]
|
content_type[/(text|json)/]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Is the blob a supported image format?
|
||||||
|
#
|
||||||
|
# Return true or false
|
||||||
def image?
|
def image?
|
||||||
['.png', '.jpg', '.jpeg', '.gif'].include?(pathname.extname)
|
['.png', '.jpg', '.jpeg', '.gif'].include?(pathname.extname)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Is the blob binary?
|
||||||
|
#
|
||||||
|
# Return true or false
|
||||||
|
def binary?
|
||||||
|
content_type.include?('octet') || !(text? || image?)
|
||||||
|
end
|
||||||
|
|
||||||
MEGABYTE = 1024 * 1024
|
MEGABYTE = 1024 * 1024
|
||||||
|
|
||||||
|
# Public: Is the blob too big to load?
|
||||||
|
#
|
||||||
|
# Return true or false
|
||||||
def large?
|
def large?
|
||||||
size.to_i > MEGABYTE
|
size.to_i > MEGABYTE
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Is the blob viewable?
|
||||||
|
#
|
||||||
|
# Non-viewable blobs will just show a "View Raw" link
|
||||||
|
#
|
||||||
|
# Return true or false
|
||||||
def viewable?
|
def viewable?
|
||||||
!image? && !binary? && !large?
|
!image? && !binary? && !large?
|
||||||
end
|
end
|
||||||
|
|
||||||
|
vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__))
|
||||||
|
VendoredRegexp = Regexp.new(vendored_paths.join('|'))
|
||||||
|
|
||||||
|
# Public: Is the blob in a vendored directory?
|
||||||
|
#
|
||||||
|
# Vendored files are ignored by language statistics.
|
||||||
|
#
|
||||||
|
# See "vendor.yml" for a list of vendored conventions that match
|
||||||
|
# this pattern.
|
||||||
|
#
|
||||||
|
# Return true or false
|
||||||
|
def vendored?
|
||||||
|
name =~ VendoredRegexp
|
||||||
|
end
|
||||||
|
|
||||||
|
################################################################
|
||||||
|
# Below here are methods they may access Blob#data. Consider the
|
||||||
|
# performance implications of loading it.
|
||||||
|
################################################################
|
||||||
|
|
||||||
|
# Public: Get each line of data
|
||||||
|
#
|
||||||
|
# Requires Blob#data
|
||||||
|
#
|
||||||
|
# Returns an Array of lines
|
||||||
|
def lines
|
||||||
|
@lines ||= data ? data.split("\n", -1) : []
|
||||||
|
end
|
||||||
|
|
||||||
|
# Public: Get number of lines of code
|
||||||
|
#
|
||||||
|
# Requires Blob#data
|
||||||
|
#
|
||||||
|
# Returns Integer
|
||||||
|
def loc
|
||||||
|
lines.size
|
||||||
|
end
|
||||||
|
|
||||||
|
# Public: Get number of source lines of code
|
||||||
|
#
|
||||||
|
# Requires Blob#data
|
||||||
|
#
|
||||||
|
# Returns Integer
|
||||||
|
def sloc
|
||||||
|
lines.grep(/\S/).size
|
||||||
|
end
|
||||||
|
|
||||||
|
# Public: Is the blob a generated file?
|
||||||
|
#
|
||||||
|
# Generated source code is supressed in diffs and is ignored by
|
||||||
|
# langauge statistics.
|
||||||
|
#
|
||||||
|
# Includes:
|
||||||
|
# - XCode project XML files
|
||||||
|
# - Minified JavaScript
|
||||||
|
#
|
||||||
|
# Return true or false
|
||||||
def generated?
|
def generated?
|
||||||
if ['.xib', '.nib', '.pbxproj'].include?(pathname.extname)
|
if ['.xib', '.nib', '.pbxproj'].include?(pathname.extname)
|
||||||
true
|
true
|
||||||
@@ -73,26 +166,14 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__))
|
# Public: Should the blob be indexed for searching?
|
||||||
VendoredRegexp = Regexp.new(vendored_paths.join('|'))
|
|
||||||
|
|
||||||
def vendored?
|
|
||||||
name =~ VendoredRegexp
|
|
||||||
end
|
|
||||||
|
|
||||||
# Determine if the blob contains bad content that can be used for various
|
|
||||||
# cross site attacks. Right now this is limited to flash files -- the flash
|
|
||||||
# plugin ignores the response content type and treats any URL as flash
|
|
||||||
# when the <object> tag is specified correctly regardless of file extension.
|
|
||||||
#
|
#
|
||||||
# Returns true when the blob data should not be served with any content-type.
|
# Excluded:
|
||||||
def forbidden?
|
# - Non-text files
|
||||||
if data = self.data
|
# - Generated source files
|
||||||
data.size >= 8 && # all flash has at least 8 bytes
|
# - .po and .sql files
|
||||||
%w(CWS FWS).include?(data[0,3]) # file type sigs
|
#
|
||||||
end
|
# Return true or false
|
||||||
end
|
|
||||||
|
|
||||||
def indexable?
|
def indexable?
|
||||||
if !text?
|
if !text?
|
||||||
false
|
false
|
||||||
@@ -107,10 +188,41 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Determine if the blob contains bad content that can be
|
||||||
|
# used for various cross site attacks.
|
||||||
|
#
|
||||||
|
# Right now this is limited to flash files -- the flash plugin
|
||||||
|
# ignores the response content type and treats any URL as flash
|
||||||
|
# when the <object> tag is specified correctly regardless of file
|
||||||
|
# extension.
|
||||||
|
#
|
||||||
|
# Requires Blob#data
|
||||||
|
#
|
||||||
|
# Returns true when the blob data should not be served with any
|
||||||
|
# content-type.
|
||||||
|
def forbidden?
|
||||||
|
if data = self.data
|
||||||
|
data.size >= 8 && # all flash has at least 8 bytes
|
||||||
|
%w(CWS FWS).include?(data[0,3]) # file type sigs
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Public: Detects the Language of the blob.
|
||||||
|
#
|
||||||
|
# May load Blob#data
|
||||||
|
#
|
||||||
|
# Returns a Language object
|
||||||
def language
|
def language
|
||||||
if text?
|
if text?
|
||||||
if !Language.find_by_extension(pathname.extname)
|
# First see if there is a Language for the extension
|
||||||
shebang_language || pathname.language
|
if Language.find_by_extension(pathname.extname)
|
||||||
|
pathname.language
|
||||||
|
|
||||||
|
# Try to detect Language from shebang line
|
||||||
|
elsif language = shebang_language
|
||||||
|
language
|
||||||
|
|
||||||
|
# Default to Pathname#language
|
||||||
else
|
else
|
||||||
pathname.language
|
pathname.language
|
||||||
end
|
end
|
||||||
@@ -119,12 +231,32 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Deprecated: Get the lexer of the blob.
|
||||||
|
#
|
||||||
|
# Returns a Lexer.
|
||||||
def lexer
|
def lexer
|
||||||
language.lexer
|
language.lexer
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Internal: Extract the script name from the shebang line
|
||||||
|
#
|
||||||
|
# Requires Blob#data
|
||||||
|
#
|
||||||
|
# Examples
|
||||||
|
#
|
||||||
|
# '#!/usr/bin/ruby'
|
||||||
|
# # => 'ruby'
|
||||||
|
#
|
||||||
|
# '#!/usr/bin/env ruby'
|
||||||
|
# # => 'ruby'
|
||||||
|
#
|
||||||
|
# '#!/usr/bash/python2.4'
|
||||||
|
# # => 'python'
|
||||||
|
#
|
||||||
|
# Returns a script name String or nil
|
||||||
def shebang_script
|
def shebang_script
|
||||||
return if !text? || large?
|
# Fail fast if blob isn't viewable?
|
||||||
|
return unless viewable?
|
||||||
|
|
||||||
if data && (match = data.match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
if data && (match = data.match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
||||||
bang.sub!(/^#! /, '#!')
|
bang.sub!(/^#! /, '#!')
|
||||||
@@ -147,35 +279,40 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
shebangs = YAML.load_file(File.expand_path("../shebangs.yml", __FILE__))
|
||||||
|
Shebangs = shebangs.inject({}) { |h, (name, scripts)|
|
||||||
|
scripts.each { |script| h[script] = Language[name] }
|
||||||
|
h
|
||||||
|
}
|
||||||
|
|
||||||
|
# Internal: Get Language for shebang script
|
||||||
|
#
|
||||||
|
# Matches script name with shebang script name mappings in "shebangs.yml"
|
||||||
|
#
|
||||||
|
# Returns the Language or nil
|
||||||
def shebang_language
|
def shebang_language
|
||||||
if script = shebang_script
|
if script = shebang_script
|
||||||
case script
|
if lang = Shebangs[script]
|
||||||
when 'bash'
|
lang
|
||||||
Language['Shell']
|
|
||||||
when 'groovy'
|
|
||||||
Language['Java']
|
|
||||||
when 'macruby'
|
|
||||||
Language['Ruby']
|
|
||||||
when 'node'
|
|
||||||
Language['JavaScript']
|
|
||||||
when 'rake'
|
|
||||||
Language['Ruby']
|
|
||||||
when 'sh'
|
|
||||||
Language['Shell']
|
|
||||||
when 'zsh'
|
|
||||||
Language['Shell']
|
|
||||||
else
|
else
|
||||||
lang = Language.find_by_lexer(shebang_script)
|
lang = Language.find_by_lexer(script)
|
||||||
lang != Language['Text'] ? lang : nil
|
lang != Language['Text'] ? lang : nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Highlight syntax of blob
|
||||||
|
#
|
||||||
|
# Returns html String
|
||||||
def colorize
|
def colorize
|
||||||
return if !text? || large?
|
return if !text? || large?
|
||||||
lexer.colorize(data)
|
lexer.colorize(data)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Highlight syntax of blob without the outer highlight div
|
||||||
|
# wrapper.
|
||||||
|
#
|
||||||
|
# Returns html String
|
||||||
def colorize_without_wrapper
|
def colorize_without_wrapper
|
||||||
return if !text? || large?
|
return if !text? || large?
|
||||||
lexer.colorize_without_wrapper(data)
|
lexer.colorize_without_wrapper(data)
|
||||||
|
|||||||
Reference in New Issue
Block a user