This commit is contained in:
Tim Baumann
2013-09-19 15:47:18 +02:00
39 changed files with 12038 additions and 320 deletions

View File

@@ -189,11 +189,10 @@ module Linguist
# Public: Is the blob safe to colorize?
#
# We use Pygments.rb for syntax highlighting blobs, which
# has some quirks and also is essentially 'un-killable' via
# normal timeout. To workaround this we try to
# carefully handling Pygments.rb anything it can't handle.
#
# We use Pygments for syntax highlighting blobs. Pygments
# can be too slow for very large blobs or for certain
# corner-case blobs.
#
# Return true or false
def safe_to_colorize?
!large? && text? && !high_ratio_of_long_lines?

View File

@@ -147,16 +147,20 @@ module Linguist
# indicate the least-likely language (and zero points) for each token.
def dump_all_tokens(tokens, languages)
maxlen = tokens.map { |tok| tok.size }.max
printf "%#{maxlen}s", ""
puts " #" + languages.map { |lang| sprintf("%10s", lang) }.join
tokmap = Hash.new(0)
tokens.each { |tok| tokmap[tok] += 1 }
tokmap.sort.each { |tok, count|
arr = languages.map { |lang| [lang, token_probability(tok, lang)] }
min = arr.map { |a,b| b }.min
minlog = Math.log(min)
if !arr.inject(true) { |result, n| result && n[1] == arr[0][1] }
printf "%#{maxlen}s%5d", tok, count
puts arr.map { |ent|
ent[1] == min ? " -" : sprintf("%10.3f", count * (Math.log(ent[1]) - minlog))
}.join

View File

@@ -52,11 +52,12 @@ module Linguist
# Return true or false
def generated?
name == 'Gemfile.lock' ||
minified_javascript? ||
minified_files? ||
compiled_coffeescript? ||
xcode_project_file? ||
generated_net_docfile? ||
generated_parser? ||
generated_net_docfile? ||
generated_net_designer_file? ||
generated_protocol_buffer?
end
@@ -70,16 +71,16 @@ module Linguist
['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
end
# Internal: Is the blob minified JS?
# Internal: Is the blob minified files?
#
# Consider JS minified if the average line length is
# greater then 100c.
# Consider a file minified if it contains more than 5% spaces.
# Currently, only JS and CSS files are detected by this method.
#
# Returns true or false.
def minified_javascript?
return unless extname == '.js'
if lines.any?
(lines.inject(0) { |n, l| n += l.length } / lines.length) > 100
def minified_files?
return unless ['.js', '.css'].include? extname
if data && data.length > 200
(data.each_char.count{ |c| c <= ' ' } / data.length.to_f) < 0.05
else
false
end
@@ -143,6 +144,16 @@ module Linguist
lines[-2].include?("</doc>")
end
# Internal: Is this a codegen file for a .NET project?
#
# Visual Studio often uses code generation to generate partial classes, and
# these files can be quite unwieldy. Let's hide them.
#
# Returns true or false
def generated_net_designer_file?
name.downcase =~ /\.designer\.cs$/
end
# Internal: Is the blob of JS a parser generated by PEG.js?
#
# PEG.js-generated parsers are not meant to be consumed by humans.

View File

@@ -21,6 +21,13 @@ module Linguist
# Valid Languages types
TYPES = [:data, :markup, :programming]
# Names of non-programming languages that we will still detect
#
# Returns an array
def self.detectable_markup
["CSS", "Less", "Sass"]
end
# Internal: Create a new Language object
#
# attributes - A hash of attributes
@@ -445,8 +452,6 @@ module Linguist
extnames.each do |extname|
if !options['extensions'].include?(extname)
options['extensions'] << extname
else
warn "#{name} #{extname.inspect} is already defined in samples/. Remove from languages.yml."
end
end
end
@@ -455,8 +460,6 @@ module Linguist
fns.each do |filename|
if !options['filenames'].include?(filename)
options['filenames'] << filename
else
warn "#{name} #{filename.inspect} is already defined in samples/. Remove from languages.yml."
end
end
end

View File

@@ -218,8 +218,18 @@ CMake:
filenames:
- CMakeLists.txt
COBOL:
type: programming
primary_extension: .cob
extensions:
- .cbl
- .ccp
- .cobol
- .cpy
CSS:
ace_mode: css
color: "#1f085e"
primary_extension: .css
Ceylon:
@@ -238,6 +248,7 @@ Clojure:
primary_extension: .clj
extensions:
- .cljs
- .cljx
filenames:
- riemann.config
@@ -278,6 +289,7 @@ Common Lisp:
- .asd
- .lsp
- .ny
- .podsl
Coq:
type: programming
@@ -334,14 +346,6 @@ Dart:
type: programming
primary_extension: .dart
Delphi:
type: programming
color: "#b0ce4e"
primary_extension: .pas
extensions:
- .dfm
- .lpr
DCPU-16 ASM:
type: programming
lexer: dasm16
@@ -405,7 +409,7 @@ Emacs Lisp:
Erlang:
type: programming
color: "#949e0e"
color: "#0faf8d"
primary_extension: .erl
extensions:
- .hrl
@@ -414,7 +418,9 @@ F#:
type: programming
lexer: FSharp
color: "#b845fc"
search_term: ocaml
search_term: fsharp
aliases:
- fsharp
primary_extension: .fs
extensions:
- .fsi
@@ -479,6 +485,18 @@ GAS:
extensions:
- .S
GLSL:
group: C
type: programming
primary_extension: .glsl
extensions:
- .fp
- .frag
- .geom
- .glslv
- .shader
- .vert
Genshi:
primary_extension: .kid
@@ -503,7 +521,7 @@ Gettext Catalog:
Go:
type: programming
color: "#8d04eb"
color: "#a89b4d"
primary_extension: .go
Gosu:
@@ -607,7 +625,6 @@ Haxe:
INI:
type: data
extensions:
- .cfg
- .ini
- .prefs
- .properties
@@ -686,6 +703,13 @@ Kotlin:
- .ktm
- .kts
LFE:
type: programming
primary_extension: .lfe
color: "#004200"
lexer: Common Lisp
group: Erlang
LLVM:
primary_extension: .ll
@@ -758,6 +782,8 @@ Logos:
Logtalk:
type: programming
primary_extension: .lgt
extensions:
- .logtalk
Lua:
type: programming
@@ -766,6 +792,7 @@ Lua:
primary_extension: .lua
extensions:
- .nse
- .rbxs
M:
type: programming
@@ -987,6 +1014,15 @@ Parrot Assembly:
- pasm
primary_extension: .pasm
Pascal:
type: programming
lexer: Delphi
color: "#b0ce4e"
primary_extension: .pas
extensions:
- .dfm
- .lpr
Perl:
type: programming
ace_mode: perl
@@ -994,6 +1030,7 @@ Perl:
primary_extension: .pl
extensions:
- .PL
- .nqp
- .perl
- .ph
- .plx
@@ -1146,6 +1183,7 @@ Ruby:
- .thor
- .watchr
filenames:
- Berksfile
- Gemfile
- Guardfile
- Podfile
@@ -1216,6 +1254,13 @@ Shell:
primary_extension: .sh
extensions:
- .tmux
filenames:
- Dockerfile
Slash:
type: programming
color: "#007eff"
primary_extension: .sl
Smalltalk:
type: programming
@@ -1225,6 +1270,11 @@ Smalltalk:
Smarty:
primary_extension: .tpl
Squirrel:
type: programming
lexer: C++
primary_extension: .nut
Standard ML:
type: programming
color: "#dc566d"
@@ -1312,8 +1362,8 @@ Unified Parallel C:
lexer: C
ace_mode: c_cpp
color: "#755223"
primary_extension: .upc
primary_extension: .upc
VHDL:
type: programming
lexer: vhdl
@@ -1359,12 +1409,16 @@ Visual Basic:
- .vba
- .vbs
Volt:
type: programming
lexer: D
color: "#0098db"
primary_extension: .volt
XC:
type: programming
lexer: C
primary_extension: .xc
extensions:
- .xc
XML:
type: markup
@@ -1402,9 +1456,11 @@ XML:
- .wxi
- .wxl
- .wxs
- .x3d
- .xaml
- .xlf
- .xliff
- .xmi
- .xsd
- .xul
- .zcml
@@ -1444,7 +1500,7 @@ Xtend:
primary_extension: .xtend
YAML:
type: markup
type: data
aliases:
- yml
primary_extension: .yml

View File

@@ -73,8 +73,8 @@ module Linguist
# Skip vendored or generated blobs
next if blob.vendored? || blob.generated? || blob.language.nil?
# Only include programming languages
if blob.language.type == :programming
# Only include programming languages and acceptable markup languages
if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
@sizes[blob.language.group] += blob.size
end
end

File diff suppressed because it is too large Load Diff

View File

@@ -24,6 +24,9 @@
# Node dependencies
- node_modules/
# Erlang bundles
- ^rebar$
# Vendored dependencies
- vendor/
@@ -59,10 +62,6 @@
- (^|/)yahoo-([^.]*)\.js$
- (^|/)yui([^.]*)\.js$
# LESS css
- (^|/)less([^.]*)(\.min)?\.js$
- (^|/)less\-\d+\.\d+\.\d+(\.min)?\.js$
# WYS editors
- (^|/)ckeditor\.js$
- (^|/)tiny_mce([^.]*)\.js$
@@ -114,5 +113,17 @@
# Samples folders
- ^[Ss]amples/
# LICENSE, README, git config files
- ^COPYING$
- ^LICENSE$
- gitattributes$
- gitignore$
- gitmodules$
- ^README$
- ^readme$
# Test fixtures
- ^[Tt]est/fixtures/
# .DS_Store's
- .[Dd][Ss]_[Ss]tore$