Merge branch 'master' into more-687

This commit is contained in:
Ted Nyman
2013-12-06 20:32:22 -08:00
19 changed files with 2020 additions and 67 deletions

View File

@@ -15,8 +15,8 @@ module Linguist
#
# Returns nothing.
#
# Set LINGUIST_DEBUG=1 or =2 to see probabilities per-token,
# per-language. See also dump_all_tokens, below.
# Set LINGUIST_DEBUG=1 or =2 to see probabilities per-token or
# per-language. See also #dump_all_tokens, below.
def self.train!(db, language, data)
tokens = Tokenizer.tokenize(data)
@@ -151,10 +151,10 @@ module Linguist
printf "%#{maxlen}s", ""
puts " #" + languages.map { |lang| sprintf("%10s", lang) }.join
tokmap = Hash.new(0)
tokens.each { |tok| tokmap[tok] += 1 }
token_map = Hash.new(0)
tokens.each { |tok| token_map[tok] += 1 }
tokmap.sort.each { |tok, count|
token_map.sort.each { |tok, count|
arr = languages.map { |lang| [lang, token_probability(tok, lang)] }
min = arr.map { |a,b| b }.min
minlog = Math.log(min)

View File

@@ -75,14 +75,16 @@ module Linguist
# Internal: Is the blob minified files?
#
# Consider a file minified if it contains more than 5% spaces.
# Consider a file minified if the average line length is
# greater then 110c.
#
# Currently, only JS and CSS files are detected by this method.
#
# Returns true or false.
def minified_files?
return unless ['.js', '.css'].include? extname
if data && data.length > 200
(data.each_char.count{ |c| c <= ' ' } / data.length.to_f) < 0.05
if lines.any?
(lines.inject(0) { |n, l| n += l.length } / lines.length) > 110
else
false
end
@@ -191,8 +193,8 @@ module Linguist
return false unless extname == '.h'
return false unless lines.count > 2
return lines[0].include?("/* DO NOT EDIT THIS FILE - it is machine generated */")
return lines[1].include?("#include <jni.h>")
return lines[0].include?("/* DO NOT EDIT THIS FILE - it is machine generated */") &&
lines[1].include?("#include <jni.h>")
end
# node_modules/ can contain large amounts of files, in general not meant

View File

@@ -1,6 +1,10 @@
require 'escape_utils'
require 'pygments'
require 'yaml'
begin
require 'json'
rescue LoadError
end
require 'linguist/classifier'
require 'linguist/samples'
@@ -487,7 +491,16 @@ module Linguist
filenames = Samples::DATA['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
languages_yml = File.expand_path("../languages.yml", __FILE__)
languages_json = File.expand_path("../languages.json", __FILE__)
if File.exist?(languages_json) && defined?(JSON)
languages = JSON.load(File.read(languages_json))
else
languages = YAML.load_file(languages_yml)
end
languages.each do |name, options|
options['extensions'] ||= []
options['interpreters'] ||= []
options['filenames'] ||= []

View File

@@ -29,13 +29,13 @@ ABAP:
type: programming
lexer: ABAP
primary_extension: .abap
ANTLR:
type: programming
color: "#9DC3FF"
lexer: ANTLR
primary_extension: .g4
ASP:
type: programming
color: "#6a40fd"
@@ -71,6 +71,7 @@ Ada:
Agda:
type: programming
color: "#467C91"
primary_extension: .agda
ApacheConf:
@@ -123,6 +124,15 @@ AutoHotkey:
- ahk
primary_extension: .ahk
AutoIt:
type: programming
color: "#36699B"
aliases:
- au3
- AutoIt3
- AutoItScript
primary_extension: .au3
Awk:
type: programming
lexer: Awk
@@ -178,6 +188,11 @@ Brainfuck:
extensions:
- .bf
Brightscript:
type: programming
lexer: Text only
primary_extension: .brs
Bro:
type: programming
primary_extension: .bro
@@ -378,11 +393,11 @@ D-ObjDump:
type: data
lexer: d-objdump
primary_extension: .d-objdump
DM:
type: programming
color: "#075ff1"
lexer: Text only
lexer: C++
primary_extension: .dm
aliases:
- byond
@@ -404,6 +419,7 @@ Darcs Patch:
Dart:
type: programming
color: "#98BAD6"
primary_extension: .dart
DCPU-16 ASM:
@@ -414,7 +430,7 @@ DCPU-16 ASM:
- .dasm
aliases:
- dasm16
Diff:
primary_extension: .diff
@@ -463,7 +479,7 @@ Emacs Lisp:
- elisp
- emacs
primary_extension: .el
filenames:
filenames:
- .emacs
extensions:
- .emacs
@@ -749,6 +765,8 @@ JSON:
- .sublime_session
- .sublime-settings
- .sublime-workspace
filenames:
- composer.lock
Jade:
group: HTML
@@ -946,15 +964,17 @@ Matlab:
Max:
type: programming
color: "#ce279c"
lexer: Text only
lexer: JSON
aliases:
- max/msp
- maxmsp
search_term: max/msp
primary_extension: .mxt
primary_extension: .maxpat
extensions:
- .maxhelp
- .maxpat
- .maxproj
- .mxt
- .pat
MiniD: # Legacy
searchable: false
@@ -995,6 +1015,12 @@ Nemerle:
color: "#0d3c6e"
primary_extension: .n
NetLogo:
type: programming
lexer: Common Lisp
color: "#ff2b2b"
primary_extension: .nlogo
Nginx:
type: markup
lexer: Nginx configuration file
@@ -1488,6 +1514,7 @@ Tcsh:
TeX:
type: markup
color: "#3D6117"
ace_mode: latex
aliases:
- latex
@@ -1544,11 +1571,25 @@ Unified Parallel C:
color: "#755223"
primary_extension: .upc
UnrealScript:
type: programming
color: "#a54c4d"
lexer: Java
primary_extension: .uc
VHDL:
type: programming
lexer: vhdl
color: "#543978"
primary_extension: .vhdl
extensions:
- .vhd
- .vhf
- .vhi
- .vho
- .vhs
- .vht
- .vhw
Vala:
type: programming
@@ -1620,6 +1661,7 @@ XML:
- .grxml
- .jelly
- .kml
- .launch
- .mxml
- .plist
- .pluginspec
@@ -1629,6 +1671,7 @@ XML:
- .rdf
- .rss
- .scxml
- .srdf
- .svg
- .tmCommand
- .tmLanguage
@@ -1637,12 +1680,14 @@ XML:
- .tmTheme
- .tml
- .ui
- .urdf
- .vxml
- .wsdl
- .wxi
- .wxl
- .wxs
- .x3d
- .xacro
- .xaml
- .xlf
- .xliff
@@ -1653,6 +1698,7 @@ XML:
filenames:
- .classpath
- .project
- phpunit.xml.dist
XProc:
type: programming
@@ -1694,6 +1740,7 @@ YAML:
primary_extension: .yml
extensions:
- .reek
- .rviz
- .yaml
eC:

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,8 @@
require 'yaml'
begin
require 'json'
rescue LoadError
require 'yaml'
end
require 'linguist/md5'
require 'linguist/classifier'
@@ -14,7 +18,8 @@ module Linguist
# Hash of serialized samples object
if File.exist?(PATH)
DATA = YAML.load_file(PATH)
serializer = defined?(JSON) ? JSON : YAML
DATA = serializer.load(File.read(PATH))
end
# Public: Iterate over each sample.

View File

@@ -139,7 +139,7 @@
# LICENSE, README, git config files
- ^COPYING$
- ^LICENSE$
- LICENSE$
- gitattributes$
- gitignore$
- gitmodules$