mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Document blob helper
This commit is contained in:
		| @@ -6,19 +6,50 @@ require 'escape_utils' | ||||
| require 'yaml' | ||||
|  | ||||
| module Linguist | ||||
|   # BlobHelper is a mixin for Blobish classes that respond to "name", | ||||
|   # "data" and "size" such as Grit::Blob. | ||||
|   module BlobHelper | ||||
|     # Internal: Get a Pathname wrapper for Blob#name | ||||
|     # | ||||
|     # Returns a Pathname. | ||||
|     def pathname | ||||
|       Pathname.new(name || "") | ||||
|     end | ||||
|  | ||||
|     # Public: Get the actual blob mime type | ||||
|     # | ||||
|     # Examples | ||||
|     # | ||||
|     #   # => 'text/plain' | ||||
|     #   # => 'text/html' | ||||
|     # | ||||
|     # Returns a mime type String. | ||||
|     def mime_type | ||||
|       @mime_type ||= pathname.mime_type | ||||
|     end | ||||
|  | ||||
|     # Public: Get the Content-Type header value | ||||
|     # | ||||
|     # This value is used when serving raw blobs. | ||||
|     # | ||||
|     # Examples | ||||
|     # | ||||
|     #   # => 'text/plain; charset=utf-8' | ||||
|     #   # => 'application/octet-stream' | ||||
|     # | ||||
|     # Returns a content type String. | ||||
|     def content_type | ||||
|       pathname.content_type | ||||
|     end | ||||
|  | ||||
|     # Public: Get the Content-Disposition header value | ||||
|     # | ||||
|     # This value is used when serving raw blobs. | ||||
|     # | ||||
|     #   # => "attachment; filename=file.tar" | ||||
|     #   # => "inline" | ||||
|     # | ||||
|     # Returns a content disposition String. | ||||
|     def disposition | ||||
|       case content_type | ||||
|       when 'application/octet-stream', 'application/java-archive' | ||||
| @@ -28,40 +59,102 @@ module Linguist | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     def lines | ||||
|       @lines ||= data ? data.split("\n", -1) : [] | ||||
|     end | ||||
|  | ||||
|     def loc | ||||
|       lines.size | ||||
|     end | ||||
|  | ||||
|     def sloc | ||||
|       lines.grep(/\S/).size | ||||
|     end | ||||
|  | ||||
|     def binary? | ||||
|       content_type.include?('octet') || !(text? || image?) | ||||
|     end | ||||
|  | ||||
|     # Public: Is the blob text? | ||||
|     # | ||||
|     # Return true or false | ||||
|     def text? | ||||
|       content_type[/(text|json)/] | ||||
|     end | ||||
|  | ||||
|     # Public: Is the blob a supported image format? | ||||
|     # | ||||
|     # Return true or false | ||||
|     def image? | ||||
|       ['.png', '.jpg', '.jpeg', '.gif'].include?(pathname.extname) | ||||
|     end | ||||
|  | ||||
|     # Public: Is the blob binary? | ||||
|     # | ||||
|     # Return true or false | ||||
|     def binary? | ||||
|       content_type.include?('octet') || !(text? || image?) | ||||
|     end | ||||
|  | ||||
|     MEGABYTE = 1024 * 1024 | ||||
|  | ||||
|     # Public: Is the blob too big to load? | ||||
|     # | ||||
|     # Return true or false | ||||
|     def large? | ||||
|       size.to_i > MEGABYTE | ||||
|     end | ||||
|  | ||||
|     # Public: Is the blob viewable? | ||||
|     # | ||||
|     # Non-viewable blobs will just show a "View Raw" link | ||||
|     # | ||||
|     # Return true or false | ||||
|     def viewable? | ||||
|       !image? && !binary? && !large? | ||||
|     end | ||||
|  | ||||
|     vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__)) | ||||
|     VendoredRegexp = Regexp.new(vendored_paths.join('|')) | ||||
|  | ||||
|     # Public: Is the blob in a vendored directory? | ||||
|     # | ||||
|     # Vendored files are ignored by language statistics. | ||||
|     # | ||||
|     # See "vendor.yml" for a list of vendored conventions that match | ||||
|     # this pattern. | ||||
|     # | ||||
|     # Return true or false | ||||
|     def vendored? | ||||
|       name =~ VendoredRegexp | ||||
|     end | ||||
|  | ||||
|     ################################################################ | ||||
|     # Below here are methods they may access Blob#data. Consider the | ||||
|     # performance implications of loading it. | ||||
|     ################################################################ | ||||
|  | ||||
|     # Public: Get each line of data | ||||
|     # | ||||
|     # Requires Blob#data | ||||
|     # | ||||
|     # Returns an Array of lines | ||||
|     def lines | ||||
|       @lines ||= data ? data.split("\n", -1) : [] | ||||
|     end | ||||
|  | ||||
|     # Public: Get number of lines of code | ||||
|     # | ||||
|     # Requires Blob#data | ||||
|     # | ||||
|     # Returns Integer | ||||
|     def loc | ||||
|       lines.size | ||||
|     end | ||||
|  | ||||
|     # Public: Get number of source lines of code | ||||
|     # | ||||
|     # Requires Blob#data | ||||
|     # | ||||
|     # Returns Integer | ||||
|     def sloc | ||||
|       lines.grep(/\S/).size | ||||
|     end | ||||
|  | ||||
|     # Public: Is the blob a generated file? | ||||
|     # | ||||
|     # Generated source code is supressed in diffs and is ignored by | ||||
|     # langauge statistics. | ||||
|     # | ||||
|     # Includes: | ||||
|     # - XCode project XML files | ||||
|     # - Minified JavaScript | ||||
|     # | ||||
|     # Return true or false | ||||
|     def generated? | ||||
|       if ['.xib', '.nib', '.pbxproj'].include?(pathname.extname) | ||||
|         true | ||||
| @@ -73,26 +166,14 @@ module Linguist | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__)) | ||||
|     VendoredRegexp = Regexp.new(vendored_paths.join('|')) | ||||
|  | ||||
|     def vendored? | ||||
|       name =~ VendoredRegexp | ||||
|     end | ||||
|  | ||||
|     # Determine if the blob contains bad content that can be used for various | ||||
|     # cross site attacks. Right now this is limited to flash files -- the flash | ||||
|     # plugin ignores the response content type and treats any URL as flash | ||||
|     # when the <object> tag is specified correctly regardless of file extension. | ||||
|     # Public: Should the blob be indexed for searching? | ||||
|     # | ||||
|     # Returns true when the blob data should not be served with any content-type. | ||||
|     def forbidden? | ||||
|       if data = self.data | ||||
|         data.size >= 8 &&                     # all flash has at least 8 bytes | ||||
|           %w(CWS FWS).include?(data[0,3])     # file type sigs | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     # Excluded: | ||||
|     # - Non-text files | ||||
|     # - Generated source files | ||||
|     # - .po and .sql files | ||||
|     # | ||||
|     # Return true or false | ||||
|     def indexable? | ||||
|       if !text? | ||||
|         false | ||||
| @@ -107,10 +188,41 @@ module Linguist | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     # Public: Determine if the blob contains bad content that can be | ||||
|     # used for various cross site attacks. | ||||
|     # | ||||
|     # Right now this is limited to flash files -- the flash  plugin | ||||
|     # ignores the response content type and treats any URL as flash | ||||
|     # when the <object> tag is specified correctly regardless of file | ||||
|     # extension. | ||||
|     # | ||||
|     # Requires Blob#data | ||||
|     # | ||||
|     # Returns true when the blob data should not be served with any | ||||
|     # content-type. | ||||
|     def forbidden? | ||||
|       if data = self.data | ||||
|         data.size >= 8 &&                 # all flash has at least 8 bytes | ||||
|           %w(CWS FWS).include?(data[0,3]) # file type sigs | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     # Public: Detects the Language of the blob. | ||||
|     # | ||||
|     # May load Blob#data | ||||
|     # | ||||
|     # Returns a Language object | ||||
|     def language | ||||
|       if text? | ||||
|         if !Language.find_by_extension(pathname.extname) | ||||
|           shebang_language || pathname.language | ||||
|         # First see if there is a Language for the extension | ||||
|         if Language.find_by_extension(pathname.extname) | ||||
|           pathname.language | ||||
|  | ||||
|         # Try to detect Language from shebang line | ||||
|         elsif language = shebang_language | ||||
|           language | ||||
|  | ||||
|         # Default to Pathname#language | ||||
|         else | ||||
|           pathname.language | ||||
|         end | ||||
| @@ -119,12 +231,32 @@ module Linguist | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     # Deprecated: Get the lexer of the blob. | ||||
|     # | ||||
|     # Returns a Lexer. | ||||
|     def lexer | ||||
|       language.lexer | ||||
|     end | ||||
|  | ||||
|     # Internal: Extract the script name from the shebang line | ||||
|     # | ||||
|     # Requires Blob#data | ||||
|     # | ||||
|     # Examples | ||||
|     # | ||||
|     #   '#!/usr/bin/ruby' | ||||
|     #   # => 'ruby' | ||||
|     # | ||||
|     #   '#!/usr/bin/env ruby' | ||||
|     #   # => 'ruby' | ||||
|     # | ||||
|     #   '#!/usr/bash/python2.4' | ||||
|     #   # => 'python' | ||||
|     # | ||||
|     # Returns a script name String or nil | ||||
|     def shebang_script | ||||
|       return if !text? || large? | ||||
|       # Fail fast if blob isn't viewable? | ||||
|       return unless viewable? | ||||
|  | ||||
|       if data && (match = data.match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/ | ||||
|         bang.sub!(/^#! /, '#!') | ||||
| @@ -147,35 +279,40 @@ module Linguist | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     shebangs = YAML.load_file(File.expand_path("../shebangs.yml", __FILE__)) | ||||
|     Shebangs = shebangs.inject({}) { |h, (name, scripts)| | ||||
|       scripts.each { |script| h[script] = Language[name] } | ||||
|       h | ||||
|     } | ||||
|  | ||||
|     # Internal: Get Language for shebang script | ||||
|     # | ||||
|     # Matches script name with shebang script name mappings in "shebangs.yml" | ||||
|     # | ||||
|     # Returns the Language or nil | ||||
|     def shebang_language | ||||
|       if script = shebang_script | ||||
|         case script | ||||
|         when 'bash' | ||||
|           Language['Shell'] | ||||
|         when 'groovy' | ||||
|           Language['Java'] | ||||
|         when 'macruby' | ||||
|           Language['Ruby'] | ||||
|         when 'node' | ||||
|           Language['JavaScript'] | ||||
|         when 'rake' | ||||
|           Language['Ruby'] | ||||
|         when 'sh' | ||||
|           Language['Shell'] | ||||
|         when 'zsh' | ||||
|           Language['Shell'] | ||||
|         if lang = Shebangs[script] | ||||
|           lang | ||||
|         else | ||||
|           lang = Language.find_by_lexer(shebang_script) | ||||
|           lang = Language.find_by_lexer(script) | ||||
|           lang != Language['Text'] ? lang : nil | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     # Public: Highlight syntax of blob | ||||
|     # | ||||
|     # Returns html String | ||||
|     def colorize | ||||
|       return if !text? || large? | ||||
|       lexer.colorize(data) | ||||
|     end | ||||
|  | ||||
|     # Public: Highlight syntax of blob without the outer highlight div | ||||
|     # wrapper. | ||||
|     # | ||||
|     # Returns html String | ||||
|     def colorize_without_wrapper | ||||
|       return if !text? || large? | ||||
|       lexer.colorize_without_wrapper(data) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user