Merge github.com:github/linguist

Conflicts:
	lib/linguist/vendor.yml
This commit is contained in:
Chris Kuehl
2013-11-04 20:32:52 -08:00
53 changed files with 13498 additions and 470 deletions

View File

@@ -70,7 +70,7 @@ module Linguist
#
# Return true or false
def likely_binary?
binary_mime_type? and not Language.find_by_filename(name)
binary_mime_type? && !Language.find_by_filename(name)
end
# Public: Get the Content-Type header value
@@ -189,11 +189,10 @@ module Linguist
# Public: Is the blob safe to colorize?
#
# We use Pygments.rb for syntax highlighting blobs, which
# has some quirks and also is essentially 'un-killable' via
# normal timeout. To workaround this we try to
# carefully handling Pygments.rb anything it can't handle.
#
# We use Pygments for syntax highlighting blobs. Pygments
# can be too slow for very large blobs or for certain
# corner-case blobs.
#
# Return true or false
def safe_to_colorize?
!large? && text? && !high_ratio_of_long_lines?
@@ -278,36 +277,6 @@ module Linguist
@_generated ||= Generated.generated?(name, lambda { data })
end
# Public: Should the blob be indexed for searching?
#
# Excluded:
# - Files over 0.1MB
# - Non-text files
# - Languages marked as not searchable
# - Generated source files
#
# Please add additional test coverage to
# `test/test_blob.rb#test_indexable` if you make any changes.
#
# Return true or false
def indexable?
if size > 100 * 1024
false
elsif binary?
false
elsif extname == '.txt'
true
elsif language.nil?
false
elsif !language.searchable?
false
elsif generated?
false
else
true
end
end
# Public: Detects the Language of the blob.
#
# May load Blob#data
@@ -343,19 +312,5 @@ module Linguist
options[:options][:encoding] ||= encoding
lexer.highlight(data, options)
end
# Public: Highlight syntax of blob without the outer highlight div
# wrapper.
#
# options - A Hash of options (defaults to {})
#
# Returns html String
def colorize_without_wrapper(options = {})
if text = colorize(options)
text[%r{<div class="highlight"><pre>(.*?)</pre>\s*</div>}m, 1]
else
''
end
end
end
end

View File

@@ -130,38 +130,42 @@ module Linguist
Math.log(@languages[language].to_f / @languages_total.to_f)
end
private
def verbosity
@verbosity ||= (ENV['LINGUIST_DEBUG']||0).to_i
end
private
def verbosity
@verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
end
# Internal: show a table of probabilities for each <token,language> pair.
#
# The number in each table entry is the number of "points" that each
# token contributes toward the belief that the file under test is a
# particular language. Points are additive.
#
# Points are the number of times a token appears in the file, times
# how much more likely (log of probability ratio) that token is to
# appear in one language vs. the least-likely language. Dashes
# indicate the least-likely language (and zero points) for each token.
def dump_all_tokens(tokens, languages)
maxlen = tokens.map{|tok| tok.size}.max
printf "%#{maxlen}s", ""
puts " #" + languages.map{|lang| sprintf("%10s", lang)}.join
tokmap = Hash.new(0)
tokens.each{|tok| tokmap[tok] += 1}
tokmap.sort.each{|tok, count|
arr = languages.map{|lang| [lang, token_probability(tok, lang)] }
min = arr.map{|a,b| b}.min
minlog = Math.log(min)
if !arr.inject(true) {|result, n| result && n[1] == arr[0][1]} # if not all the same
printf "%#{maxlen}s%5d", tok, count
puts arr.map{|ent|
ent[1] == min ? " -" : sprintf("%10.3f", count*(Math.log(ent[1])-minlog))
}.join
end
}
end
# Internal: show a table of probabilities for each <token,language> pair.
#
# The number in each table entry is the number of "points" that each
# token contributes toward the belief that the file under test is a
# particular language. Points are additive.
#
# Points are the number of times a token appears in the file, times
# how much more likely (log of probability ratio) that token is to
# appear in one language vs. the least-likely language. Dashes
# indicate the least-likely language (and zero points) for each token.
def dump_all_tokens(tokens, languages)
maxlen = tokens.map { |tok| tok.size }.max
printf "%#{maxlen}s", ""
puts " #" + languages.map { |lang| sprintf("%10s", lang) }.join
tokmap = Hash.new(0)
tokens.each { |tok| tokmap[tok] += 1 }
tokmap.sort.each { |tok, count|
arr = languages.map { |lang| [lang, token_probability(tok, lang)] }
min = arr.map { |a,b| b }.min
minlog = Math.log(min)
if !arr.inject(true) { |result, n| result && n[1] == arr[0][1] }
printf "%#{maxlen}s%5d", tok, count
puts arr.map { |ent|
ent[1] == min ? " -" : sprintf("%10.3f", count * (Math.log(ent[1]) - minlog))
}.join
end
}
end
end
end

View File

@@ -52,12 +52,14 @@ module Linguist
# Return true or false
def generated?
name == 'Gemfile.lock' ||
minified_javascript? ||
minified_files? ||
compiled_coffeescript? ||
xcode_project_file? ||
generated_net_docfile? ||
generated_parser? ||
generated_protocol_buffer?
generated_net_docfile? ||
generated_net_designer_file? ||
generated_protocol_buffer? ||
generated_jni_header?
end
# Internal: Is the blob an XCode project file?
@@ -70,16 +72,16 @@ module Linguist
['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
end
# Internal: Is the blob minified JS?
# Internal: Is the blob minified files?
#
# Consider JS minified if the average line length is
# greater then 100c.
# Consider a file minified if it contains more than 5% spaces.
# Currently, only JS and CSS files are detected by this method.
#
# Returns true or false.
def minified_javascript?
return unless extname == '.js'
if lines.any?
(lines.inject(0) { |n, l| n += l.length } / lines.length) > 100
def minified_files?
return unless ['.js', '.css'].include? extname
if data && data.length > 200
(data.each_char.count{ |c| c <= ' ' } / data.length.to_f) < 0.05
else
false
end
@@ -143,6 +145,16 @@ module Linguist
lines[-2].include?("</doc>")
end
# Internal: Is this a codegen file for a .NET project?
#
# Visual Studio often uses code generation to generate partial classes, and
# these files can be quite unwieldy. Let's hide them.
#
# Returns true or false
def generated_net_designer_file?
name.downcase =~ /\.designer\.cs$/
end
# Internal: Is the blob of JS a parser generated by PEG.js?
#
# PEG.js-generated parsers are not meant to be consumed by humans.
@@ -170,5 +182,16 @@ module Linguist
return lines[0].include?("Generated by the protocol buffer compiler. DO NOT EDIT!")
end
# Internal: Is the blob a C/C++ header generated by the Java JNI tool javah?
#
# Returns true of false.
def generated_jni_header?
return false unless extname == '.h'
return false unless lines.count > 2
return lines[0].include?("/* DO NOT EDIT THIS FILE - it is machine generated */")
return lines[1].include?("#include <jni.h>")
end
end
end

View File

@@ -15,12 +15,21 @@ module Linguist
@index = {}
@name_index = {}
@alias_index = {}
@extension_index = Hash.new { |h,k| h[k] = [] }
@filename_index = Hash.new { |h,k| h[k] = [] }
@extension_index = Hash.new { |h,k| h[k] = [] }
@filename_index = Hash.new { |h,k| h[k] = [] }
@primary_extension_index = {}
# Valid Languages types
TYPES = [:data, :markup, :programming]
# Names of non-programming languages that we will still detect
#
# Returns an array
def self.detectable_markup
["CSS", "Less", "Sass"]
end
# Internal: Create a new Language object
#
# attributes - A hash of attributes
@@ -56,6 +65,12 @@ module Linguist
@extension_index[extension] << language
end
if @primary_extension_index.key?(language.primary_extension)
raise ArgumentError, "Duplicate primary extension: #{language.primary_extension}"
end
@primary_extension_index[language.primary_extension] = language
language.filenames.each do |filename|
@filename_index[filename] << language
end
@@ -141,7 +156,10 @@ module Linguist
# Returns all matching Languages or [] if none were found.
def self.find_by_filename(filename)
basename, extname = File.basename(filename), File.extname(filename)
@filename_index[basename] + @extension_index[extname]
langs = [@primary_extension_index[extname]] +
@filename_index[basename] +
@extension_index[extname]
langs.compact.uniq
end
# Public: Look up Language by its name or lexer.
@@ -445,8 +463,6 @@ module Linguist
extnames.each do |extname|
if !options['extensions'].include?(extname)
options['extensions'] << extname
else
warn "#{name} #{extname.inspect} is already defined in samples/. Remove from languages.yml."
end
end
end
@@ -455,8 +471,6 @@ module Linguist
fns.each do |filename|
if !options['filenames'].include?(filename)
options['filenames'] << filename
else
warn "#{name} #{filename.inspect} is already defined in samples/. Remove from languages.yml."
end
end
end

View File

@@ -28,7 +28,13 @@ ABAP:
type: programming
lexer: ABAP
primary_extension: .abap
ANTLR:
type: programming
color: "#9DC3FF"
lexer: ANTLR
primary_extension: .g4
ASP:
type: programming
color: "#6a40fd"
@@ -61,7 +67,7 @@ Ada:
primary_extension: .adb
extensions:
- .ads
ApacheConf:
type: markup
aliases:
@@ -137,6 +143,11 @@ Befunge:
BlitzMax:
primary_extension: .bmx
Bluespec:
type: programming
lexer: verilog
primary_extension: .bsv
Boo:
type: programming
color: "#d4bec1"
@@ -212,8 +223,18 @@ CMake:
filenames:
- CMakeLists.txt
COBOL:
type: programming
primary_extension: .cob
extensions:
- .cbl
- .ccp
- .cobol
- .cpy
CSS:
ace_mode: css
color: "#1f085e"
primary_extension: .css
Ceylon:
@@ -232,6 +253,7 @@ Clojure:
primary_extension: .clj
extensions:
- .cljs
- .cljx
filenames:
- riemann.config
@@ -270,8 +292,10 @@ Common Lisp:
primary_extension: .lisp
extensions:
- .asd
- .cl
- .lsp
- .ny
- .podsl
Coq:
type: programming
@@ -308,6 +332,14 @@ D-ObjDump:
type: data
lexer: d-objdump
primary_extension: .d-objdump
DM:
type: programming
color: "#075ff1"
lexer: Text only
primary_extension: .dm
aliases:
- byond
DOT:
type: programming
@@ -328,14 +360,6 @@ Dart:
type: programming
primary_extension: .dart
Delphi:
type: programming
color: "#b0ce4e"
primary_extension: .pas
extensions:
- .dfm
- .lpr
DCPU-16 ASM:
type: programming
lexer: dasm16
@@ -344,7 +368,7 @@ DCPU-16 ASM:
- .dasm
aliases:
- dasm16
Diff:
primary_extension: .diff
@@ -383,7 +407,6 @@ Elixir:
Elm:
type: programming
lexer: Haskell
group: Haskell
primary_extension: .elm
Emacs Lisp:
@@ -399,7 +422,7 @@ Emacs Lisp:
Erlang:
type: programming
color: "#949e0e"
color: "#0faf8d"
primary_extension: .erl
extensions:
- .hrl
@@ -408,7 +431,9 @@ F#:
type: programming
lexer: FSharp
color: "#b845fc"
search_term: ocaml
search_term: fsharp
aliases:
- fsharp
primary_extension: .fs
extensions:
- .fsi
@@ -473,6 +498,18 @@ GAS:
extensions:
- .S
GLSL:
group: C
type: programming
primary_extension: .glsl
extensions:
- .fp
- .frag
- .geom
- .glslv
- .shader
- .vert
Genshi:
primary_extension: .kid
@@ -495,9 +532,15 @@ Gettext Catalog:
extensions:
- .pot
Glyph:
type: programming
color: "#e4cc98"
lexer: Tcl
primary_extension: .glf
Go:
type: programming
color: "#8d04eb"
color: "#a89b4d"
primary_extension: .go
Gosu:
@@ -581,6 +624,10 @@ Handlebars:
type: markup
lexer: Text only
primary_extension: .handlebars
extensions:
- .hbs
- .html.handlebars
- .html.hbs
Haskell:
type: programming
@@ -601,12 +648,15 @@ Haxe:
INI:
type: data
extensions:
- .cfg
- .ini
- .prefs
- .properties
primary_extension: .ini
Inno Setup:
primary_extension: .iss
lexer: Text only
IRC log:
lexer: IRC logs
search_term: irc
@@ -626,12 +676,30 @@ Ioke:
color: "#078193"
primary_extension: .ik
J:
type: programming
lexer: Text only
primary_extension: .ijs
JSON:
type: data
group: JavaScript
ace_mode: json
searchable: false
primary_extension: .json
extensions:
- .sublime-keymap
- .sublime_metrics
- .sublime-mousemap
- .sublime-project
- .sublime_session
- .sublime-settings
- .sublime-workspace
Jade:
group: HTML
type: markup
primary_extension: .jade
Java:
type: programming
@@ -672,6 +740,7 @@ JavaScript:
Julia:
type: programming
primary_extension: .jl
color: "#a270ba"
Kotlin:
type: programming
@@ -680,6 +749,13 @@ Kotlin:
- .ktm
- .kts
LFE:
type: programming
primary_extension: .lfe
color: "#004200"
lexer: Common Lisp
group: Erlang
LLVM:
primary_extension: .ll
@@ -745,6 +821,8 @@ Logos:
Logtalk:
type: programming
primary_extension: .lgt
extensions:
- .logtalk
Lua:
type: programming
@@ -753,13 +831,16 @@ Lua:
primary_extension: .lua
extensions:
- .nse
- .rbxs
M:
type: programming
lexer: Common Lisp
aliases:
- mumps
primary_extension: .m
primary_extension: .mumps
extensions:
- .m
Makefile:
aliases:
@@ -974,6 +1055,15 @@ Parrot Assembly:
- pasm
primary_extension: .pasm
Pascal:
type: programming
lexer: Delphi
color: "#b0ce4e"
primary_extension: .pas
extensions:
- .dfm
- .lpr
Perl:
type: programming
ace_mode: perl
@@ -981,6 +1071,7 @@ Perl:
primary_extension: .pl
extensions:
- .PL
- .nqp
- .perl
- .ph
- .plx
@@ -1008,6 +1099,9 @@ PowerShell:
aliases:
- posh
primary_extension: .ps1
extensions:
- .psd1
- .psm1
Processing:
type: programming
@@ -1022,6 +1116,13 @@ Prolog:
extensions:
- .pro
Protocol Buffer:
type: markup
aliases:
- protobuf
- Protocol Buffers
primary_extension: .proto
Puppet:
type: programming
color: "#cc5555"
@@ -1044,6 +1145,7 @@ Python:
primary_extension: .py
extensions:
- .gyp
- .pyt
- .pyw
- .wsgi
- .xpy
@@ -1062,6 +1164,19 @@ R:
color: "#198ce7"
lexer: S
primary_extension: .r
filenames:
- .Rprofile
REALbasic:
type: programming
lexer: VB.net
primary_extension: .rbbas
extensions:
- .rbfrm
- .rbmnu
- .rbres
- .rbtbar
- .rbuistate
RHTML:
type: markup
@@ -1124,6 +1239,7 @@ Ruby:
- .gemspec
- .god
- .irbrc
- .mspec
- .podspec
- .rbuild
- .rbw
@@ -1132,6 +1248,7 @@ Ruby:
- .thor
- .watchr
filenames:
- Berksfile
- Gemfile
- Guardfile
- Podfile
@@ -1172,6 +1289,11 @@ Scala:
color: "#7dd3b0"
primary_extension: .scala
Scaml:
group: HTML
type: markup
primary_extension: .scaml
Scheme:
type: programming
color: "#1e4aec"
@@ -1201,7 +1323,15 @@ Shell:
- zsh
primary_extension: .sh
extensions:
- .bats
- .tmux
filenames:
- Dockerfile
Slash:
type: programming
color: "#007eff"
primary_extension: .sl
Smalltalk:
type: programming
@@ -1211,6 +1341,11 @@ Smalltalk:
Smarty:
primary_extension: .tpl
Squirrel:
type: programming
lexer: C++
primary_extension: .nut
Standard ML:
type: programming
color: "#dc566d"
@@ -1237,6 +1372,8 @@ Tcl:
type: programming
color: "#e4cc98"
primary_extension: .tcl
extensions:
- .adp
Tcsh:
type: programming
@@ -1253,6 +1390,7 @@ TeX:
primary_extension: .tex
extensions:
- .aux
- .bib
- .dtx
- .ins
- .ltx
@@ -1287,7 +1425,6 @@ Twig:
TypeScript:
type: programming
color: "#31859c"
lexer: Text only
aliases:
- ts
primary_extension: .ts
@@ -1298,8 +1435,8 @@ Unified Parallel C:
lexer: C
ace_mode: c_cpp
color: "#755223"
primary_extension: .upc
primary_extension: .upc
VHDL:
type: programming
lexer: vhdl
@@ -1345,6 +1482,17 @@ Visual Basic:
- .vba
- .vbs
Volt:
type: programming
lexer: D
color: "#0098db"
primary_extension: .volt
XC:
type: programming
lexer: C
primary_extension: .xc
XML:
type: markup
ace_mode: xml
@@ -1356,14 +1504,18 @@ XML:
extensions:
- .axml
- .ccxml
- .clixml
- .dita
- .ditamap
- .ditaval
- .glade
- .grxml
- .jelly
- .kml
- .mxml
- .plist
- .ps1xml
- .psc1
- .pt
- .rdf
- .rss
@@ -1373,6 +1525,7 @@ XML:
- .tmLanguage
- .tmPreferences
- .tmSnippet
- .tmTheme
- .tml
- .ui
- .vxml
@@ -1380,9 +1533,11 @@ XML:
- .wxi
- .wxl
- .wxs
- .x3d
- .xaml
- .xlf
- .xliff
- .xmi
- .xsd
- .xul
- .zcml
@@ -1403,6 +1558,8 @@ XQuery:
primary_extension: .xquery
extensions:
- .xq
- .xql
- .xqm
- .xqy
XS:
@@ -1422,7 +1579,7 @@ Xtend:
primary_extension: .xtend
YAML:
type: markup
type: data
aliases:
- yml
primary_extension: .yml

View File

@@ -73,8 +73,8 @@ module Linguist
# Skip vendored or generated blobs
next if blob.vendored? || blob.generated? || blob.language.nil?
# Only include programming languages
if blob.language.type == :programming
# Only include programming languages and acceptable markup languages
if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
@sizes[blob.language.group] += blob.size
end
end

File diff suppressed because it is too large Load Diff

View File

@@ -24,8 +24,15 @@
# Node dependencies
- node_modules/
# Erlang bundles
- ^rebar$
# Bootstrap minified css and js
- (^|/)bootstrap([^.]*)(\.min)\.(js|css)$
# Vendored dependencies
- vendor/
- thirdparty/
- vendors?/
# Debian packaging
- ^debian/
@@ -34,7 +41,11 @@
# jQuery
- (^|/)jquery([^.]*)(\.min)?\.js$
- (^|/)jquery\-\d\.\d(\.\d)?(\.min)?\.js$
- (^|/)jquery\-\d\.\d+(\.\d+)?(\.min)?\.js$
# jQuery UI
- (^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?(\.min)?\.(js|css)$
- (^|/)jquery\.(ui|effects)\.([^.]*)(\.min)?\.(js|css)$
# Prototype
- (^|/)prototype(.*)\.js$
@@ -55,10 +66,6 @@
- (^|/)yahoo-([^.]*)\.js$
- (^|/)yui([^.]*)\.js$
# LESS css
- (^|/)less([^.]*)(\.min)?\.js$
- (^|/)less\-\d+\.\d+\.\d+(\.min)?\.js$
# WYS editors
- (^|/)ckeditor\.js$
- (^|/)tiny_mce([^.]*)\.js$
@@ -95,7 +102,8 @@
- -vsdoc\.js$
# jQuery validation plugin (MS bundles this with asp.net mvc)
- (^|/)jquery([^.]*)\.validate(\.min)?\.js$
- (^|/)jquery([^.]*)\.validate(\.unobtrusive)?(\.min)?\.js$
- (^|/)jquery([^.]*)\.unobtrusive\-ajax(\.min)?\.js$
# Microsoft Ajax
- (^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$
@@ -104,14 +112,41 @@
- ^[Pp]ackages/
# ExtJS
- (^|/)extjs/
- (^|/)extjs/.*?\.js$
- (^|/)extjs/.*?\.xml$
- (^|/)extjs/.*?\.txt$
- (^|/)extjs/.*?\.html$
- (^|/)extjs/.*?\.properties$
- (^|/)extjs/.sencha/
- (^|/)extjs/docs/
- (^|/)extjs/builds/
- (^|/)extjs/cmd/
- (^|/)extjs/examples/
- (^|/)extjs/locale/
- (^|/)extjs/packages/
- (^|/)extjs/plugins/
- (^|/)extjs/resources/
- (^|/)extjs/src/
- (^|/)extjs/welcome/
# Samples folders
- ^[Ss]amples/
# LICENSE, README, git config files
- ^COPYING$
- ^LICENSE$
- gitattributes$
- gitignore$
- gitmodules$
- ^README$
- ^readme$
# Test fixtures
- ^[Tt]est/fixtures/
# PhoneGap/Cordova
- (^|/)cordova([^.]*)(\.min)?\.js$
- (^|/)cordova\-\d\.\d(\.\d)?(\.min)?\.js$
# .DS_Store's
- .[Dd][Ss]_[Ss]tore$