mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			347 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			347 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| require 'linguist/generated'
 | |
| require 'charlock_holmes'
 | |
| require 'escape_utils'
 | |
| require 'mime/types'
 | |
| require 'yaml'
 | |
| 
 | |
| module Linguist
 | |
|   # DEPRECATED Avoid mixing into Blob classes. Prefer functional interfaces
 | |
|   # like `Language.detect` over `Blob#language`. Functions are much easier to
 | |
|   # cache and compose.
 | |
|   #
 | |
|   # Avoid adding additional bloat to this module.
 | |
|   #
 | |
|   # BlobHelper is a mixin for Blobish classes that respond to "name",
 | |
|   # "data" and "size" such as Grit::Blob.
 | |
|   module BlobHelper
 | |
|     # Public: Get the extname of the path
 | |
|     #
 | |
|     # Examples
 | |
|     #
 | |
|     #   blob(name='foo.rb').extname
 | |
|     #   # => '.rb'
 | |
|     #
 | |
|     # Returns a String
 | |
|     def extname
 | |
|       File.extname(name.to_s)
 | |
|     end
 | |
| 
 | |
|     # Internal: Lookup mime type for extension.
 | |
|     #
 | |
|     # Returns a MIME::Type
 | |
|     def _mime_type
 | |
|       if defined? @_mime_type
 | |
|         @_mime_type
 | |
|       else
 | |
|         guesses = ::MIME::Types.type_for(extname.to_s)
 | |
| 
 | |
|         # Prefer text mime types over binary
 | |
|         @_mime_type = guesses.detect { |type| type.ascii? } ||
 | |
|           # Otherwise use the first guess
 | |
|           guesses.first
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     # Public: Get the actual blob mime type
 | |
|     #
 | |
|     # Examples
 | |
|     #
 | |
|     #   # => 'text/plain'
 | |
|     #   # => 'text/html'
 | |
|     #
 | |
|     # Returns a mime type String.
 | |
|     def mime_type
 | |
|       _mime_type ? _mime_type.to_s : 'text/plain'
 | |
|     end
 | |
| 
 | |
|     # Internal: Is the blob binary according to its mime type
 | |
|     #
 | |
|     # Return true or false
 | |
|     def binary_mime_type?
 | |
|       _mime_type ? _mime_type.binary? : false
 | |
|     end
 | |
| 
 | |
|     # Internal: Is the blob binary according to its mime type,
 | |
|     # overriding it if we have better data from the languages.yml
 | |
|     # database.
 | |
|     #
 | |
|     # Return true or false
 | |
|     def likely_binary?
 | |
|       binary_mime_type? && !Language.find_by_filename(name)
 | |
|     end
 | |
| 
 | |
|     # Public: Get the Content-Type header value
 | |
|     #
 | |
|     # This value is used when serving raw blobs.
 | |
|     #
 | |
|     # Examples
 | |
|     #
 | |
|     #   # => 'text/plain; charset=utf-8'
 | |
|     #   # => 'application/octet-stream'
 | |
|     #
 | |
|     # Returns a content type String.
 | |
|     def content_type
 | |
|       @content_type ||= (binary_mime_type? || binary?) ? mime_type :
 | |
|         (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
 | |
|     end
 | |
| 
 | |
|     # Public: Get the Content-Disposition header value
 | |
|     #
 | |
|     # This value is used when serving raw blobs.
 | |
|     #
 | |
|     #   # => "attachment; filename=file.tar"
 | |
|     #   # => "inline"
 | |
|     #
 | |
|     # Returns a content disposition String.
 | |
|     def disposition
 | |
|       if text? || image?
 | |
|         'inline'
 | |
|       elsif name.nil?
 | |
|         "attachment"
 | |
|       else
 | |
|         "attachment; filename=#{EscapeUtils.escape_url(name)}"
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     def encoding
 | |
|       if hash = detect_encoding
 | |
|         hash[:encoding]
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     def ruby_encoding
 | |
|       if hash = detect_encoding
 | |
|         hash[:ruby_encoding]
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     # Try to guess the encoding
 | |
|     #
 | |
|     # Returns: a Hash, with :encoding, :confidence, :type
 | |
|     #          this will return nil if an error occurred during detection or
 | |
|     #          no valid encoding could be found
 | |
|     def detect_encoding
 | |
|       @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob binary?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def binary?
 | |
|       # Large blobs aren't even loaded into memory
 | |
|       if data.nil?
 | |
|         true
 | |
| 
 | |
|       # Treat blank files as text
 | |
|       elsif data == ""
 | |
|         false
 | |
| 
 | |
|       # Charlock doesn't know what to think
 | |
|       elsif encoding.nil?
 | |
|         true
 | |
| 
 | |
|       # If Charlock says its binary
 | |
|       else
 | |
|         detect_encoding[:type] == :binary
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob empty?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def empty?
 | |
|       data.nil? || data == ""
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob text?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def text?
 | |
|       !binary?
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob a supported image format?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def image?
 | |
|       ['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase)
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob a supported 3D model format?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def solid?
 | |
|       extname.downcase == '.stl'
 | |
|     end
 | |
| 
 | |
|     # Public: Is this blob a CSV file?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def csv?
 | |
|       text? && extname.downcase == '.csv'
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob a PDF?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def pdf?
 | |
|       extname.downcase == '.pdf'
 | |
|     end
 | |
| 
 | |
|     MEGABYTE = 1024 * 1024
 | |
| 
 | |
|     # Public: Is the blob too big to load?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def large?
 | |
|       size.to_i > MEGABYTE
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob safe to colorize?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def safe_to_colorize?
 | |
|       !large? && text? && !high_ratio_of_long_lines?
 | |
|     end
 | |
| 
 | |
|     # Internal: Does the blob have a ratio of long lines?
 | |
|     #
 | |
|     # Return true or false
 | |
|     def high_ratio_of_long_lines?
 | |
|       return false if loc == 0
 | |
|       size / loc > 5000
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob viewable?
 | |
|     #
 | |
|     # Non-viewable blobs will just show a "View Raw" link
 | |
|     #
 | |
|     # Return true or false
 | |
|     def viewable?
 | |
|       !large? && text?
 | |
|     end
 | |
| 
 | |
|     vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__))
 | |
|     VendoredRegexp = Regexp.new(vendored_paths.join('|'))
 | |
| 
 | |
|     # Public: Is the blob in a vendored directory?
 | |
|     #
 | |
|     # Vendored files are ignored by language statistics.
 | |
|     #
 | |
|     # See "vendor.yml" for a list of vendored conventions that match
 | |
|     # this pattern.
 | |
|     #
 | |
|     # Return true or false
 | |
|     def vendored?
 | |
|       path =~ VendoredRegexp ? true : false
 | |
|     end
 | |
| 
 | |
|     documentation_paths = YAML.load_file(File.expand_path("../documentation.yml", __FILE__))
 | |
|     DocumentationRegexp = Regexp.new(documentation_paths.join('|'))
 | |
| 
 | |
|     # Public: Is the blob in a documentation directory?
 | |
|     #
 | |
|     # Documentation files are ignored by language statistics.
 | |
|     #
 | |
|     # See "documentation.yml" for a list of documentation conventions that match
 | |
|     # this pattern.
 | |
|     #
 | |
|     # Return true or false
 | |
|     def documentation?
 | |
|       path =~ DocumentationRegexp ? true : false
 | |
|     end
 | |
| 
 | |
|     # Public: Get each line of data
 | |
|     #
 | |
|     # Requires Blob#data
 | |
|     #
 | |
|     # Returns an Array of lines
 | |
|     def lines
 | |
|       @lines ||=
 | |
|         if viewable? && data
 | |
|           # `data` is usually encoded as ASCII-8BIT even when the content has
 | |
|           # been detected as a different encoding. However, we are not allowed
 | |
|           # to change the encoding of `data` because we've made the implicit
 | |
|           # guarantee that each entry in `lines` is encoded the same way as
 | |
|           # `data`.
 | |
|           #
 | |
|           # Instead, we re-encode each possible newline sequence as the
 | |
|           # detected encoding, then force them back to the encoding of `data`
 | |
|           # (usually a binary encoding like ASCII-8BIT). This means that the
 | |
|           # byte sequence will match how newlines are likely encoded in the
 | |
|           # file, but we don't have to change the encoding of `data` as far as
 | |
|           # Ruby is concerned. This allows us to correctly parse out each line
 | |
|           # without changing the encoding of `data`, and
 | |
|           # also--importantly--without having to duplicate many (potentially
 | |
|           # large) strings.
 | |
|           begin
 | |
|             encoded_newlines = ["\r\n", "\r", "\n"].
 | |
|               map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) }
 | |
| 
 | |
|             data.split(Regexp.union(encoded_newlines), -1)
 | |
|           rescue Encoding::ConverterNotFoundError
 | |
|             # The data is not splittable in the detected encoding.  Assume it's
 | |
|             # one big line.
 | |
|             [data]
 | |
|           end
 | |
|         else
 | |
|           []
 | |
|         end
 | |
|     end
 | |
| 
 | |
|     # Public: Get number of lines of code
 | |
|     #
 | |
|     # Requires Blob#data
 | |
|     #
 | |
|     # Returns Integer
 | |
|     def loc
 | |
|       lines.size
 | |
|     end
 | |
| 
 | |
|     # Public: Get number of source lines of code
 | |
|     #
 | |
|     # Requires Blob#data
 | |
|     #
 | |
|     # Returns Integer
 | |
|     def sloc
 | |
|       lines.grep(/\S/).size
 | |
|     end
 | |
| 
 | |
|     # Public: Is the blob a generated file?
 | |
|     #
 | |
|     # Generated source code is suppressed in diffs and is ignored by
 | |
|     # language statistics.
 | |
|     #
 | |
|     # May load Blob#data
 | |
|     #
 | |
|     # Return true or false
 | |
|     def generated?
 | |
|       @_generated ||= Generated.generated?(path, lambda { data })
 | |
|     end
 | |
| 
 | |
|     # Public: Detects the Language of the blob.
 | |
|     #
 | |
|     # May load Blob#data
 | |
|     #
 | |
|     # Returns a Language or nil if none is detected
 | |
|     def language
 | |
|       @language ||= Language.detect(self)
 | |
|     end
 | |
| 
 | |
|     # Internal: Get the TextMate compatible scope for the blob
 | |
|     def tm_scope
 | |
|       language && language.tm_scope
 | |
|     end
 | |
| 
 | |
|     DETECTABLE_TYPES = [:programming, :markup].freeze
 | |
| 
 | |
|     # Internal: Should this blob be included in repository language statistics?
 | |
|     def include_in_language_stats?
 | |
|       !vendored? &&
 | |
|       !documentation? &&
 | |
|       !generated? &&
 | |
|       language && DETECTABLE_TYPES.include?(language.type)
 | |
|     end
 | |
|   end
 | |
| end
 |