Merge remote-tracking branch 'origin/master' into strategies

* origin/master: (165 commits)
  Add F# and GLSL samples.  Add Forth and GLSL extension .fs. Add heuristic to disambiguate between F#, Forth, and GLSL.
  byebug requires ruby 2.0
  Remove test for removed extension
  Fix typo in test
  add rake interpreter
  add python3 interpreter
  Remove old wrong_shebang.rb sample
  Add byebug
  Link to Lightshow in CONTRIBUTING.md
  Switch to a better F# grammar
  Bump Rugged again
  Checkout the master for testing
  Rugged 0.22.0b3
  Reordering
  Bump version to 4.0.3
  Add some docs for tm_scope
  Change NONE to none
  Checking other case for Chart.jS
  Test that all languages have grammars
  Fix RHTML's tm_scope
  ...

Conflicts:
	lib/linguist/language.rb
This commit is contained in:
Brandon Keepers
2014-11-27 10:52:44 -05:00
129 changed files with 7316 additions and 622 deletions

View File

@@ -2,7 +2,6 @@ require 'linguist/generated'
require 'charlock_holmes'
require 'escape_utils'
require 'mime/types'
require 'pygments'
require 'yaml'
module Linguist
@@ -147,6 +146,13 @@ module Linguist
end
end
# Public: Is the blob empty?
#
# Return true or false
def empty?
data.nil? || data == ""
end
# Public: Is the blob text?
#
# Return true or false
@@ -193,10 +199,6 @@ module Linguist
# Public: Is the blob safe to colorize?
#
# We use Pygments for syntax highlighting blobs. Pygments
# can be too slow for very large blobs or for certain
# corner-case blobs.
#
# Return true or false
def safe_to_colorize?
!large? && text? && !high_ratio_of_long_lines?
@@ -204,9 +206,6 @@ module Linguist
# Internal: Does the blob have a ratio of long lines?
#
# These types of files are usually going to make Pygments.rb
# angry if we try to colorize them.
#
# Return true or false
def high_ratio_of_long_lines?
return false if loc == 0
@@ -314,28 +313,9 @@ module Linguist
@language ||= Language.detect(self)
end
# Internal: Get the lexer of the blob.
#
# Returns a Lexer.
def lexer
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
end
# Internal: Get the TextMate compatible scope for the blob
def tm_scope
language && language.tm_scope
end
# Public: Highlight syntax of blob
#
# options - A Hash of options (defaults to {})
#
# Returns html String
def colorize(options = {})
return unless safe_to_colorize?
options[:options] ||= {}
options[:options][:encoding] ||= encoding
lexer.highlight(data, options)
end
end
end

View File

@@ -57,14 +57,20 @@ module Linguist
#
# Returns a String.
def extension
# File.extname returns nil if the filename is an extension.
extension = File.extname(name)
basename = File.basename(name)
# Checks if the filename is an extension.
if extension.empty? && basename[0] == "."
basename
else
extension
extensions.last || ""
end
# Public: Return an array of the file extensions
#
# >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions
# => [".html.erb", ".erb"]
#
# Returns an Array
def extensions
basename, *segments = File.basename(name).split(".")
segments.map.with_index do |segment, index|
"." + segments[index..-1].join(".")
end
end
end

View File

@@ -51,26 +51,25 @@ module Linguist
#
# Return true or false
def generated?
name == 'Gemfile.lock' ||
minified_files? ||
compiled_coffeescript? ||
xcode_file? ||
generated_parser? ||
generated_net_docfile? ||
generated_net_designer_file? ||
generated_postscript? ||
generated_protocol_buffer? ||
generated_jni_header? ||
composer_lock? ||
node_modules? ||
godeps? ||
vcr_cassette? ||
generated_by_zephir?
minified_files? ||
compiled_coffeescript? ||
xcode_file? ||
generated_parser? ||
generated_net_docfile? ||
generated_net_designer_file? ||
generated_postscript? ||
generated_protocol_buffer? ||
generated_jni_header? ||
composer_lock? ||
node_modules? ||
godeps? ||
vcr_cassette? ||
generated_by_zephir?
end
# Internal: Is the blob an Xcode file?
#
# Generated if the file extension is an Xcode
# Generated if the file extension is an Xcode
# file extension.
#
# Returns true of false.
@@ -265,4 +264,3 @@ module Linguist
end
end
end

13
lib/linguist/grammars.rb Normal file
View File

@@ -0,0 +1,13 @@
# Note: This file is included in the github-linguist-grammars gem, not the
# github-linguist gem.
module Linguist
module Grammars
# Get the path to the directory containing the language grammar JSON files.
#
# Returns a String.
def self.path
File.expand_path("../../../grammars", __FILE__)
end
end
end

View File

@@ -4,7 +4,7 @@ module Linguist
ACTIVE = true
def self.call(blob, languages)
find_by_heuristics(blob.data, langauges)
find_by_heuristics(blob.data, langauges.map(&:name))
end
# Public: Given an array of String language names,
@@ -17,23 +17,34 @@ module Linguist
# Returns an array of Languages or []
def self.find_by_heuristics(data, languages)
if active?
result = []
if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
result = disambiguate_pl(data, languages)
result = disambiguate_pl(data)
end
if languages.all? { |l| ["ECL", "Prolog"].include?(l) }
result = disambiguate_ecl(data, languages)
result = disambiguate_ecl(data)
end
if languages.all? { |l| ["IDL", "Prolog"].include?(l) }
result = disambiguate_pro(data, languages)
result = disambiguate_pro(data)
end
if languages.all? { |l| ["Common Lisp", "OpenCL"].include?(l) }
result = disambiguate_cl(data, languages)
result = disambiguate_cl(data)
end
if languages.all? { |l| ["Hack", "PHP"].include?(l) }
result = disambiguate_hack(data)
end
if languages.all? { |l| ["Scala", "SuperCollider"].include?(l) }
result = disambiguate_sc(data, languages)
result = disambiguate_sc(data)
end
if languages.all? { |l| ["AsciiDoc", "AGS Script"].include?(l) }
result = disambiguate_asc(data, languages)
result = disambiguate_asc(data)
end
if languages.all? { |l| ["FORTRAN", "Forth"].include?(l) }
result = disambiguate_f(data)
end
if languages.all? { |l| ["F#", "Forth", "GLSL"].include?(l) }
result = disambiguate_fs(data)
end
return result
end
@@ -43,28 +54,37 @@ module Linguist
# We want to shortcut look for Objective-C _and_ now C++ too!
#
# Returns an array of Languages or []
def self.disambiguate_c(data, languages)
def self.disambiguate_c(data)
matches = []
matches << Language["Objective-C"] if data.include?("@interface")
matches << Language["C++"] if data.include?("#include <cstdint>")
if data.include?("@interface")
matches << Language["Objective-C"]
elsif data.include?("#include <cstdint>")
matches << Language["C++"]
end
matches
end
def self.disambiguate_pl(data, languages)
def self.disambiguate_pl(data)
matches = []
matches << Language["Prolog"] if data.include?(":-")
matches << Language["Perl"] if data.include?("use strict")
if data.include?("use strict")
matches << Language["Perl"]
elsif data.include?(":-")
matches << Language["Prolog"]
end
matches
end
def self.disambiguate_ecl(data, languages)
def self.disambiguate_ecl(data)
matches = []
matches << Language["Prolog"] if data.include?(":-")
matches << Language["ECL"] if data.include?(":=")
if data.include?(":-")
matches << Language["Prolog"]
elsif data.include?(":=")
matches << Language["ECL"]
end
matches
end
def self.disambiguate_pro(data, languages)
def self.disambiguate_pro(data)
matches = []
if (data.include?(":-"))
matches << Language["Prolog"]
@@ -74,7 +94,7 @@ module Linguist
matches
end
def self.disambiguate_ts(data, languages)
def self.disambiguate_ts(data)
matches = []
if (data.include?("</translation>"))
matches << Language["XML"]
@@ -84,21 +104,34 @@ module Linguist
matches
end
def self.disambiguate_cl(data, languages)
def self.disambiguate_cl(data)
matches = []
matches << Language["Common Lisp"] if data.include?("(defun ")
matches << Language["OpenCL"] if /\/\* |\/\/ |^\}/.match(data)
if data.include?("(defun ")
matches << Language["Common Lisp"]
elsif /\/\* |\/\/ |^\}/.match(data)
matches << Language["OpenCL"]
end
matches
end
def self.disambiguate_r(data, languages)
def self.disambiguate_r(data)
matches = []
matches << Language["Rebol"] if /\bRebol\b/i.match(data)
matches << Language["R"] if data.include?("<-")
matches
end
def self.disambiguate_sc(data, languages)
def self.disambiguate_hack(data)
matches = []
if data.include?("<?hh")
matches << Language["Hack"]
elsif /<?[^h]/.match(data)
matches << Language["PHP"]
end
matches
end
def self.disambiguate_sc(data)
matches = []
if (/\^(this|super)\./.match(data) || /^\s*(\+|\*)\s*\w+\s*{/.match(data) || /^\s*~\w+\s*=\./.match(data))
matches << Language["SuperCollider"]
@@ -109,12 +142,34 @@ module Linguist
matches
end
def self.disambiguate_asc(data, languages)
def self.disambiguate_asc(data)
matches = []
matches << Language["AsciiDoc"] if /^=+(\s|\n)/.match(data)
matches
end
def self.disambiguate_f(data)
matches = []
if /^: /.match(data)
matches << Language["Forth"]
elsif /^([c*][^a-z]| subroutine\s)/i.match(data)
matches << Language["FORTRAN"]
end
matches
end
def self.disambiguate_fs(data)
matches = []
if /^(: |new-device)/.match(data)
matches << Language["Forth"]
elsif /^(#light|import|let|module|namespace|open|type)/.match(data)
matches << Language["F#"]
elsif /^(#include|#pragma|precision|uniform|varying|void)/.match(data)
matches << Language["GLSL"]
end
matches
end
def self.active?
!!ACTIVE
end

View File

@@ -1,5 +1,4 @@
require 'escape_utils'
require 'pygments'
require 'yaml'
begin
require 'yajl'
@@ -62,7 +61,7 @@ module Linguist
end
# Language name index
@index[language.name] = @name_index[language.name] = language
@index[language.name.downcase] = @name_index[language.name.downcase] = language
language.aliases.each do |name|
# All Language aliases should be unique. Raise if there is a duplicate.
@@ -70,7 +69,7 @@ module Linguist
raise ArgumentError, "Duplicate alias: #{name}"
end
@index[name] = @alias_index[name] = language
@index[name.downcase] = @alias_index[name.downcase] = language
end
language.extensions.each do |extension|
@@ -112,8 +111,8 @@ module Linguist
#
# Returns Language or nil.
def self.detect(blob)
# Check if the blob is possibly binary and bail early.
return nil if blob.likely_binary? || blob.binary?
# Bail early if the blob is binary or empty.
return nil if blob.likely_binary? || blob.binary? || blob.empty?
# Call each strategy until 0 or 1 candidates are returned
STRATEGIES.reduce([]) do |languages, strategy|
@@ -149,7 +148,7 @@ module Linguist
#
# Returns the Language or nil if none was found.
def self.find_by_name(name)
@name_index[name]
name && @name_index[name.downcase]
end
# Public: Look up Language by one of its aliases.
@@ -163,7 +162,7 @@ module Linguist
#
# Returns the Lexer or nil if none was found.
def self.find_by_alias(name)
@alias_index[name]
name && @alias_index[name.downcase]
end
# Public: Look up Languages by filename.
@@ -178,10 +177,31 @@ module Linguist
# Returns all matching Languages or [] if none were found.
def self.find_by_filename(filename)
basename = File.basename(filename)
extname = FileBlob.new(filename).extension
langs = @filename_index[basename] +
@extension_index[extname]
langs.compact.uniq
# find the first extension with language definitions
extname = FileBlob.new(filename).extensions.detect do |e|
!@extension_index[e].empty?
end
(@filename_index[basename] + @extension_index[extname]).compact.uniq
end
# Public: Look up Languages by file extension.
#
# extname - The extension String.
#
# Examples
#
# Language.find_by_extension('.rb')
# # => [#<Language name="Ruby">]
#
# Language.find_by_extension('rb')
# # => [#<Language name="Ruby">]
#
# Returns all matching Languages or [] if none were found.
def self.find_by_extension(extname)
extname = ".#{extname}" unless extname.start_with?(".")
@extension_index[extname]
end
# Public: Look up Languages by shebang line.
@@ -212,7 +232,7 @@ module Linguist
#
# Returns the Language or nil if none was found.
def self.[](name)
@index[name]
name && @index[name.downcase]
end
# Public: A List of popular languages
@@ -271,10 +291,7 @@ module Linguist
# Set aliases
@aliases = [default_alias_name] + (attributes[:aliases] || [])
# Lookup Lexer object
@lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) ||
raise(ArgumentError, "#{@name} is missing lexer")
# Load the TextMate scope name or try to guess one
@tm_scope = attributes[:tm_scope] || begin
context = case @type
when :data, :markup, :prose
@@ -406,11 +423,6 @@ module Linguist
# Returns the extensions Array
attr_reader :filenames
# Public: Return all possible extensions for language
def all_extensions
(extensions + [primary_extension]).uniq
end
# Deprecated: Get primary extension
#
# Defaults to the first extension but can be overridden
@@ -568,9 +580,9 @@ module Linguist
:ace_mode => options['ace_mode'],
:wrap => options['wrap'],
:group_name => options['group'],
:searchable => options.key?('searchable') ? options['searchable'] : true,
:searchable => options.fetch('searchable', true),
:search_term => options['search_term'],
:extensions => [options['extensions'].first] + options['extensions'][1..-1].sort,
:extensions => Array(options['extensions']),
:interpreters => options['interpreters'].sort,
:filenames => options['filenames'],
:popular => popular.include?(name)

File diff suppressed because it is too large Load Diff

View File

@@ -52,14 +52,16 @@ module Linguist
})
end
else
path = File.join(dirname, filename)
if File.extname(filename) == ""
raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
end
yield({
:path => File.join(dirname, filename),
:path => path,
:language => category,
:interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
:interpreter => Linguist.interpreter_from_shebang(File.read(path)),
:extname => File.extname(filename)
})
end
@@ -131,18 +133,19 @@ module Linguist
script = script == 'env' ? tokens[1] : script
# "python2.6" -> "python"
if script =~ /((?:\d+\.?)+)/
script.sub! $1, ''
end
# If script has an invalid shebang, we might get here
return unless script
# "python2.6" -> "python2"
script.sub! $1, '' if script =~ /(\.\d+)$/
# Check for multiline shebang hacks that call `exec`
if script == 'sh' &&
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
script = $1
end
script
File.basename(script)
else
nil
end

View File

@@ -8,8 +8,8 @@ module Linguist
# A bit of an elegant hack. If the file is executable but extensionless,
# append a "magic" extension so it can be classified with other
# languages that have shebang scripts.
extension = FileBlob.new(name).extension
if extension.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
extensions = FileBlob.new(name).extensions
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
name += ".script!"
end

View File

@@ -36,15 +36,16 @@
# Go dependencies
- Godeps/_workspace/
# Bootstrap minified css and js
- (^|/)bootstrap([^.]*)(\.min)?\.(js|css)$
# Minified JavaScript and CSS
- (\.|-)min\.(js|css)$
# Bootstrap css and js
- (^|/)bootstrap([^.]*)\.(js|css)$
# Font Awesome
- font-awesome.min.css
- font-awesome.css
# Foundation css
- foundation.min.css
- foundation.css
# Normalize.css
@@ -56,7 +57,6 @@
# Animate.css
- animate.css
- animate.min.css
# Vendored dependencies
- third[-_]?party/
@@ -73,12 +73,12 @@
## Commonly Bundled JavaScript frameworks ##
# jQuery
- (^|/)jquery([^.]*)(\.min)?\.js$
- (^|/)jquery\-\d\.\d+(\.\d+)?(\.min)?\.js$
- (^|/)jquery([^.]*)\.js$
- (^|/)jquery\-\d\.\d+(\.\d+)?\.js$
# jQuery UI
- (^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?(\.min)?\.(js|css)$
- (^|/)jquery\.(ui|effects)\.([^.]*)(\.min)?\.(js|css)$
- (^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$
- (^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$
# Prototype
- (^|/)prototype(.*)\.js$
@@ -110,27 +110,32 @@
# MathJax
- (^|/)MathJax/
# Chart.js
- (^|/)Chart\.js$
# Codemirror
- (^|/)[Cc]ode[Mm]irror/(lib|mode|theme|addon|keymap)
# SyntaxHighlighter - http://alexgorbatchev.com/
- (^|/)shBrush([^.]*)\.js$
- (^|/)shCore\.js$
- (^|/)shLegacy\.js$
# AngularJS
- (^|/)angular([^.]*)(\.min)?\.js$
- (^|/)angular([^.]*)\.js$
# D3.js
- (^|\/)d3(\.v\d+)?([^.]*)(\.min)?\.js$
- (^|\/)d3(\.v\d+)?([^.]*)\.js$
# React
- (^|/)react(-[^.]*)?(\.min)?\.js$
- (^|/)react(-[^.]*)?\.js$
# Modernizr
- (^|/)modernizr\-\d\.\d+(\.\d+)?(\.min)?\.js$
- (^|/)modernizr\-\d\.\d+(\.\d+)?\.js$
- (^|/)modernizr\.custom\.\d+\.js$
# Knockout
- (^|/)knockout-(\d+\.){3}(debug\.)?js$
- knockout-min.js
## Python ##
@@ -168,8 +173,8 @@
- \.intellisense\.js$
# jQuery validation plugin (MS bundles this with asp.net mvc)
- (^|/)jquery([^.]*)\.validate(\.unobtrusive)?(\.min)?\.js$
- (^|/)jquery([^.]*)\.unobtrusive\-ajax(\.min)?\.js$
- (^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$
- (^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$
# Microsoft Ajax
- (^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$
@@ -196,7 +201,7 @@
- (^|/)extjs/welcome/
# Html5shiv
- (^|/)html5shiv(\.min)?\.js$
- (^|/)html5shiv\.js$
# Samples folders
- ^[Ss]amples/
@@ -215,8 +220,8 @@
- ^[Tt]est/fixtures/
# PhoneGap/Cordova
- (^|/)cordova([^.]*)(\.min)?\.js$
- (^|/)cordova\-\d\.\d(\.\d)?(\.min)?\.js$
- (^|/)cordova([^.]*)\.js$
- (^|/)cordova\-\d\.\d(\.\d)?\.js$
# Foundation js
- foundation(\..*)?\.js$
@@ -236,7 +241,6 @@
# Octicons
- octicons.css
- octicons.min.css
- sprockets-octicons.scss
# Typesafe Activator

View File

@@ -1,3 +1,3 @@
module Linguist
VERSION = "3.4.1"
VERSION = "4.0.3"
end