mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
362 lines
8.7 KiB
Ruby
362 lines
8.7 KiB
Ruby
require 'linguist/language'
|
|
require 'linguist/mime'
|
|
require 'linguist/pathname'
|
|
|
|
require 'escape_utils'
|
|
require 'yaml'
|
|
|
|
module Linguist
|
|
# BlobHelper is a mixin for Blobish classes that respond to "name",
|
|
# "data" and "size" such as Grit::Blob.
|
|
module BlobHelper
|
|
# Internal: Get a Pathname wrapper for Blob#name
|
|
#
|
|
# Returns a Pathname.
|
|
def pathname
|
|
Pathname.new(name || "")
|
|
end
|
|
|
|
# Public: Get the actual blob mime type
|
|
#
|
|
# Examples
|
|
#
|
|
# # => 'text/plain'
|
|
# # => 'text/html'
|
|
#
|
|
# Returns a mime type String.
|
|
def mime_type
|
|
@mime_type ||= pathname.mime_type
|
|
end
|
|
|
|
# Public: Get the Content-Type header value
|
|
#
|
|
# This value is used when serving raw blobs.
|
|
#
|
|
# Examples
|
|
#
|
|
# # => 'text/plain; charset=utf-8'
|
|
# # => 'application/octet-stream'
|
|
#
|
|
# Returns a content type String.
|
|
def content_type
|
|
pathname.content_type
|
|
end
|
|
|
|
# Public: Get the Content-Disposition header value
|
|
#
|
|
# This value is used when serving raw blobs.
|
|
#
|
|
# # => "attachment; filename=file.tar"
|
|
# # => "inline"
|
|
#
|
|
# Returns a content disposition String.
|
|
def disposition
|
|
case content_type
|
|
when 'application/octet-stream', 'application/java-archive'
|
|
"attachment; filename=#{EscapeUtils.escape_url(pathname.basename)}"
|
|
else
|
|
'inline'
|
|
end
|
|
end
|
|
|
|
# Public: Is the blob text?
|
|
#
|
|
# Return true or false
|
|
def text?
|
|
content_type[/(text|json)/]
|
|
end
|
|
|
|
# Public: Is the blob a supported image format?
|
|
#
|
|
# Return true or false
|
|
def image?
|
|
['.png', '.jpg', '.jpeg', '.gif'].include?(pathname.extname)
|
|
end
|
|
|
|
# Public: Is the blob binary?
|
|
#
|
|
# Return true or false
|
|
def binary?
|
|
content_type.include?('octet') || !(text? || image?)
|
|
end
|
|
|
|
MEGABYTE = 1024 * 1024
|
|
|
|
# Public: Is the blob too big to load?
|
|
#
|
|
# Return true or false
|
|
def large?
|
|
size.to_i > MEGABYTE
|
|
end
|
|
|
|
# Public: Is the blob viewable?
|
|
#
|
|
# Non-viewable blobs will just show a "View Raw" link
|
|
#
|
|
# Return true or false
|
|
def viewable?
|
|
!image? && !binary? && !large?
|
|
end
|
|
|
|
vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__))
|
|
VendoredRegexp = Regexp.new(vendored_paths.join('|'))
|
|
|
|
# Public: Is the blob in a vendored directory?
|
|
#
|
|
# Vendored files are ignored by language statistics.
|
|
#
|
|
# See "vendor.yml" for a list of vendored conventions that match
|
|
# this pattern.
|
|
#
|
|
# Return true or false
|
|
def vendored?
|
|
name =~ VendoredRegexp
|
|
end
|
|
|
|
################################################################
|
|
# Below here are methods they may access Blob#data. Consider the
|
|
# performance implications of loading it.
|
|
################################################################
|
|
|
|
# Public: Get each line of data
|
|
#
|
|
# Requires Blob#data
|
|
#
|
|
# Returns an Array of lines
|
|
def lines
|
|
@lines ||= data ? data.split("\n", -1) : []
|
|
end
|
|
|
|
# Public: Get number of lines of code
|
|
#
|
|
# Requires Blob#data
|
|
#
|
|
# Returns Integer
|
|
def loc
|
|
lines.size
|
|
end
|
|
|
|
# Public: Get number of source lines of code
|
|
#
|
|
# Requires Blob#data
|
|
#
|
|
# Returns Integer
|
|
def sloc
|
|
lines.grep(/\S/).size
|
|
end
|
|
|
|
# Public: Is the blob a generated file?
|
|
#
|
|
# Generated source code is supressed in diffs and is ignored by
|
|
# langauge statistics.
|
|
#
|
|
# Requires Blob#data
|
|
#
|
|
# Includes:
|
|
# - XCode project XML files
|
|
# - Minified JavaScript
|
|
#
|
|
# Return true or false
|
|
def generated?
|
|
if ['.xib', '.nib', '.pbxproj'].include?(pathname.extname)
|
|
true
|
|
elsif generated_coffeescript?
|
|
true
|
|
elsif pathname.extname == '.js'
|
|
# JS is minified if any lines are longer than 1000c
|
|
lines.any? { |l| l.length > 1000 }
|
|
else
|
|
false
|
|
end
|
|
end
|
|
|
|
# Internal: Is the blob JS generated by CoffeeScript?
|
|
#
|
|
# Requires Blob#data
|
|
#
|
|
# CoffeScript is meant to output JS that would be difficult to
|
|
# tell if it was generated or not. Look for a number of patterns
|
|
# outputed by the CS compiler.
|
|
#
|
|
# Return true or false
|
|
def generated_coffeescript?
|
|
return unless pathname.extname == '.js'
|
|
|
|
if lines[0] == '(function() {' && # First line is module closure opening
|
|
lines[-2] == '}).call(this);' && # Second to last line closes module closure
|
|
lines[-1] == '' # Last line is blank
|
|
|
|
score = 0
|
|
|
|
lines.each do |line|
|
|
if line =~ /var /
|
|
# Underscored temp vars are likely to be Coffee
|
|
score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
|
|
|
|
# bind and extend functions are very Coffee specific
|
|
score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
|
|
end
|
|
end
|
|
|
|
# Require a score of 3. This is fairly arbitrary. Consider
|
|
# tweaking later.
|
|
score >= 3
|
|
else
|
|
false
|
|
end
|
|
end
|
|
|
|
# Public: Should the blob be indexed for searching?
|
|
#
|
|
# Excluded:
|
|
# - Non-text files
|
|
# - Generated source files
|
|
# - .po and .sql files
|
|
#
|
|
# Return true or false
|
|
def indexable?
|
|
if !text?
|
|
false
|
|
elsif generated?
|
|
false
|
|
elsif ['.po', '.sql'].include?(pathname.extname)
|
|
false
|
|
elsif Language.find_by_extension(pathname.extname)
|
|
true
|
|
else
|
|
false
|
|
end
|
|
end
|
|
|
|
# Public: Determine if the blob contains bad content that can be
|
|
# used for various cross site attacks.
|
|
#
|
|
# Right now this is limited to flash files -- the flash plugin
|
|
# ignores the response content type and treats any URL as flash
|
|
# when the <object> tag is specified correctly regardless of file
|
|
# extension.
|
|
#
|
|
# Requires Blob#data
|
|
#
|
|
# Returns true when the blob data should not be served with any
|
|
# content-type.
|
|
def forbidden?
|
|
if data = self.data
|
|
data.size >= 8 && # all flash has at least 8 bytes
|
|
%w(CWS FWS).include?(data[0,3]) # file type sigs
|
|
end
|
|
end
|
|
|
|
# Public: Detects the Language of the blob.
|
|
#
|
|
# May load Blob#data
|
|
#
|
|
# Returns a Language object
|
|
def language
|
|
if text?
|
|
# First see if there is a Language for the extension
|
|
if Language.find_by_extension(pathname.extname)
|
|
pathname.language
|
|
|
|
# Try to detect Language from shebang line
|
|
elsif language = shebang_language
|
|
language
|
|
|
|
# Default to Pathname#language
|
|
else
|
|
pathname.language
|
|
end
|
|
else
|
|
Language['Text']
|
|
end
|
|
end
|
|
|
|
# Deprecated: Get the lexer of the blob.
|
|
#
|
|
# Returns a Lexer.
|
|
def lexer
|
|
language.lexer
|
|
end
|
|
|
|
# Internal: Extract the script name from the shebang line
|
|
#
|
|
# Requires Blob#data
|
|
#
|
|
# Examples
|
|
#
|
|
# '#!/usr/bin/ruby'
|
|
# # => 'ruby'
|
|
#
|
|
# '#!/usr/bin/env ruby'
|
|
# # => 'ruby'
|
|
#
|
|
# '#!/usr/bash/python2.4'
|
|
# # => 'python'
|
|
#
|
|
# Returns a script name String or nil
|
|
def shebang_script
|
|
# Fail fast if blob isn't viewable?
|
|
return unless viewable?
|
|
|
|
if data && (match = data.match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
|
bang.sub!(/^#! /, '#!')
|
|
tokens = bang.split(' ')
|
|
pieces = tokens.first.split('/')
|
|
if pieces.size > 1
|
|
script = pieces.last
|
|
else
|
|
script = pieces.first.sub('#!', '')
|
|
end
|
|
|
|
script = script == 'env' ? tokens[1] : script
|
|
|
|
# python2.4 => python
|
|
if script =~ /((?:\d+\.?)+)/
|
|
script.sub! $1, ''
|
|
end
|
|
|
|
script
|
|
end
|
|
end
|
|
|
|
shebangs = YAML.load_file(File.expand_path("../shebangs.yml", __FILE__))
|
|
Shebangs = shebangs.inject({}) { |h, (name, scripts)|
|
|
scripts.each { |script| h[script] = Language[name] }
|
|
h
|
|
}
|
|
|
|
# Internal: Get Language for shebang script
|
|
#
|
|
# Matches script name with shebang script name mappings in "shebangs.yml"
|
|
#
|
|
# Returns the Language or nil
|
|
def shebang_language
|
|
if script = shebang_script
|
|
if lang = Shebangs[script]
|
|
lang
|
|
else
|
|
lang = Language.find_by_lexer(script)
|
|
lang != Language['Text'] ? lang : nil
|
|
end
|
|
end
|
|
end
|
|
|
|
# Public: Highlight syntax of blob
|
|
#
|
|
# Returns html String
|
|
def colorize
|
|
return if !text? || large?
|
|
lexer.colorize(data)
|
|
end
|
|
|
|
# Public: Highlight syntax of blob without the outer highlight div
|
|
# wrapper.
|
|
#
|
|
# Returns html String
|
|
def colorize_without_wrapper
|
|
return if !text? || large?
|
|
lexer.colorize_without_wrapper(data)
|
|
end
|
|
end
|
|
end
|