Compare commits

...

24 Commits

Author SHA1 Message Date
Joshua Peek
9e9500dfa9 Linguist 2.3.4 2012-09-24 10:54:17 -05:00
Joshua Peek
04cc100fba Rebuild samples db 2012-09-24 10:52:05 -05:00
Joshua Peek
31e33f99f2 Ensure lang is skipped on any binary file 2012-09-24 10:51:39 -05:00
Joshua Peek
7c51b90586 Skip empty sample 2012-09-24 10:50:49 -05:00
Joshua Peek
2b36f73da6 Some comments are triggering charlock binary 2012-09-24 10:48:22 -05:00
Joshua Peek
d96dd473b8 Rebuild samples db 2012-09-24 10:12:18 -05:00
Joshua Peek
f9066ffb7b Sort exts and filenames 2012-09-24 10:12:05 -05:00
Joshua Peek
945941d529 Update samples db 2012-09-24 10:07:58 -05:00
Joshua Peek
10e875e899 Print out samples db diffs 2012-09-24 10:07:08 -05:00
Justin Palmer
d24e5c938e sample directory needs uppercase E 2012-09-20 15:23:58 -07:00
Justin Palmer
aa069a336f add color to ecl language 2012-09-20 15:16:06 -07:00
Justin Palmer
662fc2ee9d Merge remote-tracking branch 'rengolin/ecl' 2012-09-20 15:07:41 -07:00
Ryan Tomayko
567cd6ef68 Merge pull request #250 from github/mac-format
Handle Mac Format when splitting lines
2012-09-11 14:17:21 -07:00
Ryan Tomayko
887a050db9 Only search the first 4K chars for \r 2012-09-10 01:56:08 -07:00
Ryan Tomayko
bda895eaae Test Mac Format detection and line splitting 2012-09-10 01:52:30 -07:00
Ryan Tomayko
2e49c06f47 Handle Mac Format when splitting lines 2012-09-10 01:05:48 -07:00
Joshua Peek
ae137847b4 Linguist 2.3.3 2012-09-04 09:32:21 -05:00
Scott J. Goldman
5443dc50a3 Merge pull request #247 from github/check-size-first
When testing if a blob is indexable or safe to colorize, check size first
2012-09-02 00:09:51 -07:00
Scott J. Goldman
fc435a2541 Linguist 2.3.2 2012-09-02 00:08:37 -07:00
Scott J. Goldman
04394750e7 When testing if a blob is safe to colorize, check size first
Similar to e415a13
2012-09-02 00:08:37 -07:00
Scott J. Goldman
e415a1351b When testing if a blob is indexable, check size first
Otherwise, charlock_holmes will allocate another large binary
buffer for testing the encoding, which is a problem if the binary
blob is many hundreds of MB large. It'll just fail and crash ruby.
2012-08-31 22:47:19 -07:00
Joshua Peek
6ec907a915 Merge pull request #245 from jcazevedo/master
Add Shell sample
2012-08-28 10:55:11 -07:00
Joao Azevedo
1f55f01fa9 Add Shell sample 2012-08-28 18:01:46 +01:00
Renato Golin
da6cf8dbb4 Add ECL programming language and test 2012-07-12 09:09:32 +01:00
16 changed files with 1002 additions and 154 deletions

View File

@@ -1,6 +1,6 @@
Gem::Specification.new do |s|
s.name = 'github-linguist'
s.version = '2.3.1'
s.version = '2.3.4'
s.summary = "GitHub Language detection"
s.authors = "GitHub"
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
s.add_dependency 'escape_utils', '~> 0.2.3'
s.add_dependency 'mime-types', '~> 1.19'
s.add_dependency 'pygments.rb', '>= 0.2.13'
s.add_development_dependency 'mocha'
s.add_development_dependency 'json'
s.add_development_dependency 'rake'
s.add_development_dependency 'yajl-ruby'

View File

@@ -160,7 +160,7 @@ module Linguist
#
# Return true or false
def safe_to_colorize?
text? && !large? && !high_ratio_of_long_lines?
!large? && text? && !high_ratio_of_long_lines?
end
# Internal: Does the blob have a ratio of long lines?
@@ -204,7 +204,31 @@ module Linguist
#
# Returns an Array of lines
def lines
@lines ||= (viewable? && data) ? data.split("\n", -1) : []
@lines ||=
if viewable? && data
data.split(line_split_character, -1)
else
[]
end
end
# Character used to split lines. This is almost always "\n" except when Mac
# Format is detected in which case it's "\r".
#
# Returns a split pattern string.
def line_split_character
@line_split_character ||= (mac_format?? "\r" : "\n")
end
# Public: Is the data in ** Mac Format **. This format uses \r (0x0d) characters
# for line ends and does not include a \n (0x0a).
#
# Returns true when mac format is detected.
def mac_format?
return if !viewable?
if pos = data[0, 4096].index("\r")
data[pos + 1] != ?\n
end
end
# Public: Get number of lines of code
@@ -250,7 +274,9 @@ module Linguist
#
# Return true or false
def indexable?
if binary?
if size > 100 * 1024
false
elsif binary?
false
elsif extname == '.txt'
true
@@ -260,8 +286,6 @@ module Linguist
false
elsif generated?
false
elsif size > 100 * 1024
false
else
true
end
@@ -278,7 +302,7 @@ module Linguist
if defined?(@data) && @data.is_a?(String)
data = @data
else
data = lambda { binary_mime_type? ? "" : self.data }
data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
end
@language = Language.detect(name.to_s, data, mode)

View File

@@ -84,7 +84,9 @@ module Linguist
if possible_languages.length > 1
data = data.call() if data.respond_to?(:call)
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
if data.nil? || data == ""
nil
elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
end
else

View File

@@ -367,6 +367,14 @@ Ecere Projects:
extensions:
- .epj
Ecl:
type: programming
color: "#8a1267"
primary_extension: .ecl
lexer: ECL
extensions:
- .eclxml
Eiffel:
type: programming
lexer: Text only

View File

@@ -17,11 +17,11 @@
".h"
],
"C++": [
".h",
".hpp",
".cu",
".cc",
".cpp",
".cc"
".cu",
".h",
".hpp"
],
"Ceylon": [
".ceylon"
@@ -41,6 +41,9 @@
"Diff": [
".patch"
],
"Ecl": [
".ecl"
],
"Emacs Lisp": [
".el"
],
@@ -48,11 +51,11 @@
".s"
],
"Gosu": [
".gs",
".gsp",
".gst",
".gsx",
".vark",
".gs"
".vark"
],
"Groovy": [
".gradle",
@@ -75,9 +78,9 @@
".script!"
],
"JSON": [
".json",
".maxhelp",
".maxpat",
".json"
".maxpat"
],
"Julia": [
".jl"
@@ -130,14 +133,14 @@
".pir"
],
"Perl": [
".pm",
".pl",
".t",
".script!"
".pm",
".script!",
".t"
],
"PHP": [
".php",
".module"
".module",
".php"
],
"PowerShell": [
".ps1",
@@ -154,17 +157,17 @@
".R"
],
"Racket": [
".script!",
".scrbl"
".scrbl",
".script!"
],
"Rebol": [
".r"
],
"Ruby": [
".rb",
".script!",
".rabl",
".rake"
".rake",
".rb",
".script!"
],
"Rust": [
".rs"
@@ -180,17 +183,17 @@
".sps"
],
"Scilab": [
".sci",
".sce",
".sci",
".tst"
],
"SCSS": [
".scss"
],
"Shell": [
".bash",
".script!",
".sh",
".bash",
".zsh"
],
"Standard ML": [
@@ -257,8 +260,8 @@
".gemrc"
]
},
"tokens_total": 269398,
"languages_total": 274,
"tokens_total": 271187,
"languages_total": 275,
"tokens": {
"Apex": {
"global": 70,
@@ -9157,6 +9160,84 @@
"d472341..8ad9ffb": 1,
"+": 3
},
"Ecl": {
"#option": 1,
"(": 32,
"true": 1,
")": 32,
";": 23,
"namesRecord": 4,
"RECORD": 1,
"string20": 1,
"surname": 1,
"string10": 2,
"forename": 1,
"integer2": 5,
"age": 2,
"dadAge": 1,
"mumAge": 1,
"END": 1,
"namesRecord2": 3,
"record": 1,
"extra": 1,
"end": 1,
"namesTable": 11,
"dataset": 2,
"FLAT": 2,
"namesTable2": 9,
"aveAgeL": 3,
"l": 1,
"l.dadAge": 1,
"+": 16,
"l.mumAge": 1,
"/2": 2,
"aveAgeR": 4,
"r": 1,
"r.dadAge": 1,
"r.mumAge": 1,
"output": 9,
"join": 11,
"left": 2,
"right": 3,
"//Several": 1,
"simple": 1,
"examples": 1,
"of": 1,
"sliding": 2,
"syntax": 1,
"left.age": 8,
"right.age": 12,
"-": 5,
"and": 10,
"<": 1,
"between": 7,
"//Same": 1,
"but": 1,
"on": 1,
"strings.": 1,
"Also": 1,
"includes": 1,
"to": 1,
"ensure": 1,
"sort": 1,
"is": 1,
"done": 1,
"by": 1,
"non": 1,
"before": 1,
"sliding.": 1,
"left.surname": 2,
"right.surname": 4,
"[": 4,
"]": 4,
"all": 1,
"//This": 1,
"should": 1,
"not": 1,
"generate": 1,
"a": 1,
"self": 1
},
"Emacs Lisp": {
"(": 1,
"print": 1,
@@ -25170,23 +25251,23 @@
"/": 2
},
"Shell": {
"SHEBANG#!bash": 5,
"echo": 20,
"SHEBANG#!bash": 6,
"echo": 52,
"export": 6,
"PATH": 5,
"pkgname": 1,
"stud": 4,
"-": 42,
"-": 182,
"git": 9,
"pkgver": 1,
"pkgrel": 1,
"pkgdesc": 1,
"arch": 1,
"(": 16,
"(": 89,
"i686": 1,
"x86_64": 1,
")": 19,
"url": 1,
")": 134,
"url": 4,
"license": 1,
"depends": 1,
"libev": 1,
@@ -25198,26 +25279,26 @@
"https": 1,
"//github.com/bumptech/stud.git": 1,
"_gitname": 1,
"build": 1,
"{": 5,
"build": 2,
"{": 49,
"cd": 4,
"msg": 4,
"if": 15,
"[": 24,
"d": 4,
"]": 24,
";": 16,
"then": 17,
"&&": 4,
"if": 27,
"[": 70,
"d": 7,
"]": 70,
";": 115,
"then": 29,
"&&": 54,
"pull": 3,
"origin": 1,
"else": 6,
"else": 10,
"clone": 2,
"fi": 15,
"fi": 24,
"rm": 2,
"rf": 1,
"make": 2,
"}": 5,
"}": 47,
"package": 1,
"PREFIX": 1,
"/usr": 1,
@@ -25225,21 +25306,21 @@
"install": 2,
"Dm755": 1,
"init.stud": 1,
"mkdir": 1,
"p": 1,
"#": 9,
"mkdir": 2,
"p": 2,
"#": 11,
"Bash": 1,
"script": 1,
"to": 4,
"the": 3,
"to": 15,
"the": 9,
"dotfile": 1,
"repository": 2,
"repository": 3,
"does": 1,
"a": 2,
"a": 7,
"lot": 1,
"of": 1,
"of": 2,
"fun": 2,
"stuff": 2,
"stuff": 3,
"like": 1,
"turning": 1,
"normal": 1,
@@ -25248,11 +25329,11 @@
".bashrc": 1,
"into": 1,
"symlinks": 1,
"this": 1,
"this": 4,
"away": 1,
"optionally": 1,
"moving": 1,
"old": 1,
"old": 4,
"files": 1,
"so": 1,
"that": 1,
@@ -25260,7 +25341,7 @@
"can": 1,
"be": 1,
"preserved": 1,
"setting": 1,
"setting": 2,
"up": 1,
"cron": 1,
"job": 1,
@@ -25271,38 +25352,38 @@
"some": 1,
"more": 1,
"shopt": 1,
"s": 2,
"s": 6,
"nocasematch": 1,
"This": 1,
"makes": 1,
"pattern": 1,
"matching": 1,
"case": 2,
"case": 8,
"insensitive": 1,
"POSTFIX": 1,
"URL": 1,
"PUSHURL": 1,
"overwrite": 3,
"true": 1,
"true": 2,
"print_help": 2,
"e": 2,
"exit": 7,
"for": 3,
"opt": 2,
"in": 4,
"@": 1,
"do": 3,
"e": 4,
"exit": 9,
"for": 5,
"opt": 3,
"in": 14,
"@": 3,
"do": 7,
"k": 1,
"|": 2,
"|": 14,
"keep": 1,
"local": 3,
"local": 22,
"false": 2,
"h": 1,
"help": 1,
"esac": 1,
"done": 3,
"f": 3,
".*": 1,
"h": 3,
"help": 3,
"esac": 6,
"done": 7,
"f": 11,
".*": 2,
"o": 3,
"continue": 1,
"mv": 1,
@@ -25314,35 +25395,296 @@
".jobs.cron": 1,
"source": 3,
"/.bashrc": 1,
"set": 2,
"n": 2,
"set": 9,
"n": 7,
"x": 1,
"unset": 3,
"unset": 6,
"system": 1,
"exec": 1,
"exec": 3,
"rbenv": 2,
"versions": 1,
"bare": 1,
"version": 1,
"z": 3,
"&": 4,
"version": 11,
"z": 4,
"&": 5,
"prefix": 1,
"/dev/null": 2,
"/dev/null": 6,
"rvm_ignore_rvmrc": 1,
"declare": 1,
"declare": 22,
"rvmrc": 3,
"rvm_rvmrc_files": 3,
"ef": 1,
"+": 1,
"GREP_OPTIONS": 1,
"grep": 1,
"printf": 1,
"grep": 6,
"printf": 4,
"rvm_path": 4,
"UID": 1,
"elif": 2,
"elif": 4,
"rvm_is_not_a_shell_function": 2,
"rvm_path/scripts": 1,
"rvm": 1,
"r": 15,
"sbt_release_version": 2,
"sbt_snapshot_version": 2,
"SNAPSHOT": 3,
"sbt_jar": 3,
"sbt_dir": 2,
"sbt_create": 2,
"sbt_snapshot": 1,
"sbt_launch_dir": 3,
"scala_version": 3,
"java_home": 1,
"sbt_explicit_version": 7,
"verbose": 6,
"debug": 11,
"quiet": 6,
"build_props_sbt": 3,
"project/build.properties": 9,
"versionLine": 2,
"sbt.version": 3,
"versionString": 3,
"versionLine##sbt.version": 1,
"update_build_props_sbt": 2,
"ver": 5,
"return": 3,
"perl": 3,
"pi": 1,
"q": 4,
"||": 12,
"Updated": 1,
"file": 3,
"Previous": 1,
"value": 1,
"was": 1,
"sbt_version": 8,
"v": 5,
"echoerr": 3,
"vlog": 1,
"dlog": 8,
"get_script_path": 2,
"path": 11,
"L": 1,
"target": 1,
"readlink": 1,
"get_mem_opts": 3,
"mem": 4,
"perm": 6,
"/": 2,
"<": 2,
"codecache": 1,
"die": 2,
"make_url": 3,
"groupid": 1,
"category": 1,
"default_jvm_opts": 1,
"default_sbt_opts": 1,
"default_sbt_mem": 2,
"noshare_opts": 1,
"sbt_opts_file": 1,
"jvm_opts_file": 1,
"latest_28": 1,
"latest_29": 1,
"latest_210": 1,
"script_path": 1,
"script_dir": 1,
"script_name": 2,
"java_cmd": 2,
"java": 2,
"sbt_mem": 5,
"residual_args": 4,
"java_args": 3,
"scalac_args": 4,
"sbt_commands": 2,
"build_props_scala": 1,
"build.scala.versions": 1,
"versionLine##build.scala.versions": 1,
"%": 3,
"execRunner": 2,
"arg": 3,
"sbt_groupid": 3,
"*": 11,
"org.scala": 4,
"tools.sbt": 3,
"sbt": 18,
"sbt_artifactory_list": 2,
"version0": 2,
"curl": 4,
"list": 1,
"only": 2,
"F": 1,
"pe": 1,
"make_release_url": 2,
"releases": 1,
"make_snapshot_url": 2,
"snapshots": 1,
"head": 1,
"jar_url": 1,
"jar_file": 1,
"download_url": 2,
"jar": 3,
"dirname": 1,
"which": 4,
"fail": 1,
"silent": 1,
"output": 1,
"wget": 2,
"O": 1,
"acquire_sbt_jar": 1,
"sbt_url": 1,
"usage": 2,
"cat": 3,
"<<": 2,
"EOM": 3,
"Usage": 1,
"options": 4,
"print": 1,
"message": 1,
"runner": 1,
"is": 5,
"chattier": 1,
"log": 2,
"level": 2,
"Debug": 1,
"Error": 1,
"no": 9,
"colors": 2,
"disable": 1,
"ANSI": 1,
"color": 1,
"codes": 1,
"create": 2,
"start": 1,
"even": 1,
"current": 1,
"directory": 3,
"contains": 2,
"project": 1,
"dir": 3,
"<path>": 3,
"global": 1,
"settings/plugins": 1,
"default": 4,
"/.sbt/": 1,
"<version>": 1,
"boot": 3,
"shared": 1,
"/.sbt/boot": 1,
"series": 1,
"ivy": 2,
"Ivy": 1,
"/.ivy2": 1,
"<integer>": 1,
"memory": 1,
"share": 2,
"use": 1,
"all": 1,
"caches": 1,
"sharing": 1,
"offline": 3,
"put": 1,
"mode": 2,
"jvm": 2,
"<port>": 1,
"Turn": 1,
"on": 2,
"JVM": 1,
"debugging": 1,
"open": 1,
"at": 1,
"given": 2,
"port.": 1,
"batch": 2,
"Disable": 1,
"interactive": 1,
"The": 1,
"way": 1,
"accomplish": 1,
"pre": 1,
"there": 2,
"build.properties": 1,
"an": 1,
"property": 1,
"update": 1,
"disk.": 1,
"That": 1,
"scalacOptions": 3,
"S": 2,
"stripped": 1,
"In": 1,
"duplicated": 1,
"or": 1,
"conflicting": 1,
"order": 1,
"above": 1,
"shows": 1,
"precedence": 1,
"JAVA_OPTS": 1,
"lowest": 1,
"command": 1,
"line": 1,
"highest.": 1,
"addJava": 9,
"addSbt": 12,
"addScalac": 2,
"addResidual": 2,
"addResolver": 1,
"addDebugger": 2,
"get_jvm_opts": 2,
"process_args": 2,
"require_arg": 12,
"type": 1,
"while": 2,
"gt": 1,
"shift": 28,
"integer": 1,
"inc": 1,
"port": 1,
"snapshot": 1,
"launch": 1,
"scala": 3,
"home": 2,
"D*": 1,
"J*": 1,
"S*": 1,
"sbtargs": 3,
"IFS": 1,
"read": 1,
"<\"$sbt_opts_file\">": 1,
"process": 1,
"combined": 1,
"args": 2,
"reset": 1,
"residuals": 1,
"argumentCount=": 1,
"we": 1,
"were": 1,
"any": 1,
"opts": 1,
"eq": 1,
"0": 1,
"ThisBuild": 1,
"Update": 1,
"properties": 1,
"disk": 1,
"explicit": 1,
"gives": 1,
"us": 1,
"choice": 1,
"Detected": 1,
"Overriding": 1,
"alert": 1,
"them": 1,
"here": 1,
"argumentCount": 1,
"./build.sbt": 1,
"./project": 1,
"pwd": 1,
"doesn": 1,
"t": 1,
"understand": 1,
"iflast": 1,
"#residual_args": 1,
"SHEBANG#!sh": 2,
"SHEBANG#!zsh": 2
},
@@ -27410,6 +27752,7 @@
"Dart": 68,
"Delphi": 30,
"Diff": 16,
"Ecl": 281,
"Emacs Lisp": 3,
"GAS": 133,
"Gosu": 413,
@@ -27452,7 +27795,7 @@
"Scheme": 3478,
"Scilab": 69,
"SCSS": 39,
"Shell": 500,
"Shell": 2008,
"Standard ML": 243,
"SuperCollider": 135,
"Tea": 3,
@@ -27480,6 +27823,7 @@
"Dart": 1,
"Delphi": 1,
"Diff": 1,
"Ecl": 1,
"Emacs Lisp": 1,
"GAS": 1,
"Gosu": 5,
@@ -27500,7 +27844,7 @@
"Nemerle": 1,
"Nimrod": 1,
"Nu": 1,
"Objective-C": 20,
"Objective-C": 19,
"OCaml": 1,
"Opa": 2,
"OpenCL": 1,
@@ -27522,7 +27866,7 @@
"Scheme": 1,
"Scilab": 3,
"SCSS": 1,
"Shell": 15,
"Shell": 16,
"Standard ML": 2,
"SuperCollider": 1,
"Tea": 1,
@@ -27537,5 +27881,5 @@
"XSLT": 1,
"YAML": 1
},
"md5": "ca1d189ca79aa9c52370da78f2c2b4dd"
"md5": "8591cfa68ab6fe3b3dacbcb885be70d0"
}

View File

@@ -76,12 +76,14 @@ module Linguist
db['extnames'][language_name] ||= []
if !db['extnames'][language_name].include?(sample[:extname])
db['extnames'][language_name] << sample[:extname]
db['extnames'][language_name].sort!
end
end
if sample[:filename]
db['filenames'][language_name] ||= []
db['filenames'][language_name] << sample[:filename]
db['filenames'][language_name].sort!
end
data = File.read(sample[:path])

View File

@@ -1,13 +1,3 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
(** This file is deprecated, for a tree on list, use [Mergesort.v]. *)
(** A development of Treesort on Heap trees. It has an average
complexity of O(n.log n) but of O() in the worst case (e.g. if
the list is already sorted) *)
@@ -88,9 +78,9 @@ Section defs.
forall P:Tree -> Type,
P Tree_Leaf ->
(forall (a:A) (T1 T2:Tree),
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
forall T:Tree, is_heap T -> P T.
Proof.
simple induction T; auto with datatypes.
@@ -105,9 +95,9 @@ Section defs.
forall P:Tree -> Set,
P Tree_Leaf ->
(forall (a:A) (T1 T2:Tree),
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
forall T:Tree, is_heap T -> P T.
Proof.
simple induction T; auto with datatypes.
@@ -135,13 +125,13 @@ Section defs.
(forall a, HdRel leA a l1 -> HdRel leA a l2 -> HdRel leA a l) ->
merge_lem l1 l2.
Require Import Morphisms.
Instance: Equivalence (@meq A).
Proof. constructor; auto with datatypes. red. apply meq_trans. Defined.
Instance: Proper (@meq A ++> @meq _ ++> @meq _) (@munion A).
Proof. intros x y H x' y' H'. now apply meq_congr. Qed.
Lemma merge :
forall l1:list A, Sorted leA l1 ->
forall l2:list A, Sorted leA l2 -> merge_lem l1 l2.
@@ -150,8 +140,8 @@ Section defs.
apply merge_exist with l2; auto with datatypes.
rename l1 into l.
revert l2 H0. fix 1. intros.
destruct l2 as [|a0 l0].
apply merge_exist with (a :: l); simpl; auto with datatypes.
destruct l2 as [|a0 l0].
apply merge_exist with (a :: l); simpl; auto with datatypes.
elim (leA_dec a a0); intros.
(* 1 (leA a a0) *)
@@ -159,18 +149,18 @@ Section defs.
destruct (merge l H (a0 :: l0) H0).
apply merge_exist with (a :: l1). clear merge merge0.
auto using cons_sort, cons_leA with datatypes.
simpl. rewrite m. now rewrite munion_ass.
intros. apply cons_leA.
simpl. rewrite m. now rewrite munion_ass.
intros. apply cons_leA.
apply (@HdRel_inv _ leA) with l; trivial with datatypes.
(* 2 (leA a0 a) *)
apply Sorted_inv in H0. destruct H0.
destruct (merge0 l0 H0). clear merge merge0.
apply merge_exist with (a0 :: l1);
destruct (merge0 l0 H0). clear merge merge0.
apply merge_exist with (a0 :: l1);
auto using cons_sort, cons_leA with datatypes.
simpl; rewrite m. simpl. setoid_rewrite munion_ass at 1. rewrite munion_comm.
repeat rewrite munion_ass. setoid_rewrite munion_comm at 3. reflexivity.
intros. apply cons_leA.
intros. apply cons_leA.
apply (@HdRel_inv _ leA) with l0; trivial with datatypes.
Qed.
@@ -186,7 +176,7 @@ Section defs.
match t with
| Tree_Leaf => emptyBag
| Tree_Node a t1 t2 =>
munion (contents t1) (munion (contents t2) (singletonBag a))
munion (contents t1) (munion (contents t2) (singletonBag a))
end.
@@ -272,11 +262,11 @@ Section defs.
apply flat_exist with (a :: l); simpl; auto with datatypes.
apply meq_trans with
(munion (list_contents _ eqA_dec l1)
(munion (list_contents _ eqA_dec l2) (singletonBag a))).
(munion (list_contents _ eqA_dec l2) (singletonBag a))).
apply meq_congr; auto with datatypes.
apply meq_trans with
(munion (singletonBag a)
(munion (list_contents _ eqA_dec l1) (list_contents _ eqA_dec l2))).
(munion (list_contents _ eqA_dec l1) (list_contents _ eqA_dec l2))).
apply munion_rotate.
apply meq_right; apply meq_sym; trivial with datatypes.
Qed.

View File

@@ -1,11 +1,3 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
Require Import Omega Relations Multiset SetoidList.
(** This file is deprecated, use [Permutation.v] instead.
@@ -154,7 +146,7 @@ Lemma permut_add_cons_inside :
Proof.
intros;
replace (a :: l) with ([] ++ a :: l); trivial;
apply permut_add_inside; trivial.
apply permut_add_inside; trivial.
Qed.
Lemma permut_middle :
@@ -168,8 +160,8 @@ Lemma permut_sym_app :
Proof.
intros l1 l2;
unfold permutation, meq;
intro a; do 2 rewrite list_contents_app; simpl;
auto with arith.
intro a; do 2 rewrite list_contents_app; simpl;
auto with arith.
Qed.
Lemma permut_rev :

View File

@@ -1,17 +1,5 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
(*********************************************************************)
(** * List permutations as a composition of adjacent transpositions *)
(*********************************************************************)
(* Adapted in May 2006 by Jean-Marc Notin from initial contents by
Laurent Théry (Huffmann contribution, October 2003) *)
Laurent Thery (Huffmann contribution, October 2003) *)
Require Import List Setoid Compare_dec Morphisms.
Import ListNotations. (* For notations [] and [a;b;c] *)

View File

@@ -1,10 +1,2 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
Require Export Sorted.
Require Export Mergesort.

42
samples/Ecl/sample.ecl Normal file
View File

@@ -0,0 +1,42 @@
/*
* Multi-line comment
*/
#option ('slidingJoins', true);
namesRecord :=
RECORD
string20 surname;
string10 forename;
integer2 age;
integer2 dadAge;
integer2 mumAge;
END;
namesRecord2 :=
record
string10 extra;
namesRecord;
end;
namesTable := dataset('x',namesRecord,FLAT);
namesTable2 := dataset('y',namesRecord2,FLAT);
integer2 aveAgeL(namesRecord l) := (l.dadAge+l.mumAge)/2;
integer2 aveAgeR(namesRecord2 r) := (r.dadAge+r.mumAge)/2;
// Standard join on a function of left and right
output(join(namesTable, namesTable2, aveAgeL(left) = aveAgeR(right)));
//Several simple examples of sliding join syntax
output(join(namesTable, namesTable2, left.age >= right.age - 10 and left.age <= right.age +10));
output(join(namesTable, namesTable2, left.age between right.age - 10 and right.age +10));
output(join(namesTable, namesTable2, left.age between right.age + 10 and right.age +30));
output(join(namesTable, namesTable2, left.age between (right.age + 20) - 10 and (right.age +20) + 10));
output(join(namesTable, namesTable2, aveAgeL(left) between aveAgeR(right)+10 and aveAgeR(right)+40));
//Same, but on strings. Also includes age to ensure sort is done by non-sliding before sliding.
output(join(namesTable, namesTable2, left.surname between right.surname[1..10]+'AAAAAAAAAA' and right.surname[1..10]+'ZZZZZZZZZZ' and left.age=right.age));
output(join(namesTable, namesTable2, left.surname between right.surname[1..10]+'AAAAAAAAAA' and right.surname[1..10]+'ZZZZZZZZZZ' and left.age=right.age,all));
//This should not generate a self join
output(join(namesTable, namesTable, left.age between right.age - 10 and right.age +10));

432
samples/Shell/sbt.script! Executable file
View File

@@ -0,0 +1,432 @@
#!/usr/bin/env bash
#
# A more capable sbt runner, coincidentally also called sbt.
# Author: Paul Phillips <paulp@typesafe.com>
# todo - make this dynamic
declare -r sbt_release_version=0.11.3
declare -r sbt_snapshot_version=0.13.0-SNAPSHOT
unset sbt_jar sbt_dir sbt_create sbt_snapshot sbt_launch_dir
unset scala_version java_home sbt_explicit_version
unset verbose debug quiet
build_props_sbt () {
if [[ -f project/build.properties ]]; then
versionLine=$(grep ^sbt.version project/build.properties)
versionString=${versionLine##sbt.version=}
echo "$versionString"
fi
}
update_build_props_sbt () {
local ver="$1"
local old=$(build_props_sbt)
if [[ $ver == $old ]]; then
return
elif [[ -f project/build.properties ]]; then
perl -pi -e "s/^sbt\.version=.*\$/sbt.version=${ver}/" project/build.properties
grep -q '^sbt.version=' project/build.properties || echo "sbt.version=${ver}" >> project/build.properties
echo !!!
echo !!! Updated file project/build.properties setting sbt.version to: $ver
echo !!! Previous value was: $old
echo !!!
fi
}
sbt_version () {
if [[ -n $sbt_explicit_version ]]; then
echo $sbt_explicit_version
else
local v=$(build_props_sbt)
if [[ -n $v ]]; then
echo $v
else
echo $sbt_release_version
fi
fi
}
echoerr () {
echo 1>&2 "$@"
}
vlog () {
[[ $verbose || $debug ]] && echoerr "$@"
}
dlog () {
[[ $debug ]] && echoerr "$@"
}
# this seems to cover the bases on OSX, and someone will
# have to tell me about the others.
get_script_path () {
local path="$1"
[[ -L "$path" ]] || { echo "$path" ; return; }
local target=$(readlink "$path")
if [[ "${target:0:1}" == "/" ]]; then
echo "$target"
else
echo "$(dirname $path)/$target"
fi
}
# a ham-fisted attempt to move some memory settings in concert
# so they need not be dicked around with individually.
get_mem_opts () {
local mem=${1:-1536}
local perm=$(( $mem / 4 ))
(( $perm > 256 )) || perm=256
(( $perm < 1024 )) || perm=1024
local codecache=$(( $perm / 2 ))
echo "-Xms${mem}m -Xmx${mem}m -XX:MaxPermSize=${perm}m -XX:ReservedCodeCacheSize=${codecache}m"
}
die() {
echo "Aborting: $@"
exit 1
}
make_url () {
groupid="$1"
category="$2"
version="$3"
echo "http://typesafe.artifactoryonline.com/typesafe/ivy-$category/$groupid/sbt-launch/$version/sbt-launch.jar"
}
declare -r default_jvm_opts="-Dfile.encoding=UTF8"
declare -r default_sbt_opts="-XX:+CMSClassUnloadingEnabled"
declare -r default_sbt_mem=1536
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r sbt_opts_file=".sbtopts"
declare -r jvm_opts_file=".jvmopts"
declare -r latest_28="2.8.2"
declare -r latest_29="2.9.1"
declare -r latest_210="2.10.0-SNAPSHOT"
declare -r script_path=$(get_script_path "$BASH_SOURCE")
declare -r script_dir="$(dirname $script_path)"
declare -r script_name="$(basename $script_path)"
# some non-read-onlies set with defaults
declare java_cmd=java
declare sbt_launch_dir="$script_dir/.lib"
declare sbt_mem=$default_sbt_mem
# pull -J and -D options to give to java.
declare -a residual_args
declare -a java_args
declare -a scalac_args
declare -a sbt_commands
build_props_scala () {
if [[ -f project/build.properties ]]; then
versionLine=$(grep ^build.scala.versions project/build.properties)
versionString=${versionLine##build.scala.versions=}
echo ${versionString%% .*}
fi
}
execRunner () {
# print the arguments one to a line, quoting any containing spaces
[[ $verbose || $debug ]] && echo "# Executing command line:" && {
for arg; do
if printf "%s\n" "$arg" | grep -q ' '; then
printf "\"%s\"\n" "$arg"
else
printf "%s\n" "$arg"
fi
done
echo ""
}
exec "$@"
}
sbt_groupid () {
case $(sbt_version) in
0.7.*) echo org.scala-tools.sbt ;;
0.10.*) echo org.scala-tools.sbt ;;
0.11.[12]) echo org.scala-tools.sbt ;;
*) echo org.scala-sbt ;;
esac
}
sbt_artifactory_list () {
local version0=$(sbt_version)
local version=${version0%-SNAPSHOT}
local url="http://typesafe.artifactoryonline.com/typesafe/ivy-snapshots/$(sbt_groupid)/sbt-launch/"
dlog "Looking for snapshot list at: $url "
curl -s --list-only "$url" | \
grep -F $version | \
perl -e 'print reverse <>' | \
perl -pe 's#^<a href="([^"/]+).*#$1#;'
}
make_release_url () {
make_url $(sbt_groupid) releases $(sbt_version)
}
# argument is e.g. 0.13.0-SNAPSHOT
# finds the actual version (with the build id) at artifactory
make_snapshot_url () {
for ver in $(sbt_artifactory_list); do
local url=$(make_url $(sbt_groupid) snapshots $ver)
dlog "Testing $url"
curl -s --head "$url" >/dev/null
dlog "curl returned: $?"
echo "$url"
return
done
}
jar_url () {
case $(sbt_version) in
0.7.*) echo "http://simple-build-tool.googlecode.com/files/sbt-launch-0.7.7.jar" ;;
*-SNAPSHOT) make_snapshot_url ;;
*) make_release_url ;;
esac
}
jar_file () {
echo "$sbt_launch_dir/$1/sbt-launch.jar"
}
download_url () {
local url="$1"
local jar="$2"
echo "Downloading sbt launcher $(sbt_version):"
echo " From $url"
echo " To $jar"
mkdir -p $(dirname "$jar") && {
if which curl >/dev/null; then
curl --fail --silent "$url" --output "$jar"
elif which wget >/dev/null; then
wget --quiet -O "$jar" "$url"
fi
} && [[ -f "$jar" ]]
}
acquire_sbt_jar () {
sbt_url="$(jar_url)"
sbt_jar="$(jar_file $(sbt_version))"
[[ -f "$sbt_jar" ]] || download_url "$sbt_url" "$sbt_jar"
}
usage () {
cat <<EOM
Usage: $script_name [options]
-h | -help print this message
-v | -verbose this runner is chattier
-d | -debug set sbt log level to Debug
-q | -quiet set sbt log level to Error
-no-colors disable ANSI color codes
-sbt-create start sbt even if current directory contains no sbt project
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt/<version>)
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
-mem <integer> set memory options (default: $sbt_mem, which is
$(get_mem_opts $sbt_mem) )
-no-share use all local caches; no sharing
-offline put sbt in offline mode
-jvm-debug <port> Turn on JVM debugging, open at the given port.
-batch Disable interactive mode
# sbt version (default: from project/build.properties if present, else latest release)
!!! The only way to accomplish this pre-0.12.0 if there is a build.properties file which
!!! contains an sbt.version property is to update the file on disk. That's what this does.
-sbt-version <version> use the specified version of sbt
-sbt-jar <path> use the specified jar as the sbt launcher
-sbt-snapshot use a snapshot version of sbt
-sbt-launch-dir <path> directory to hold sbt launchers (default: $sbt_launch_dir)
# scala version (default: as chosen by sbt)
-28 use $latest_28
-29 use $latest_29
-210 use $latest_210
-scala-home <path> use the scala build at the specified directory
-scala-version <version> use the specified version of scala
# java version (default: java from PATH, currently $(java -version |& grep version))
-java-home <path> alternate JAVA_HOME
# jvm options and output control
JAVA_OPTS environment variable holding jvm args, if unset uses "$default_jvm_opts"
SBT_OPTS environment variable holding jvm args, if unset uses "$default_sbt_opts"
.jvmopts if file is in sbt root, it is prepended to the args given to the jvm
.sbtopts if file is in sbt root, it is prepended to the args given to **sbt**
-Dkey=val pass -Dkey=val directly to the jvm
-J-X pass option -X directly to the jvm (-J is stripped)
-S-X add -X to sbt's scalacOptions (-S is stripped)
In the case of duplicated or conflicting options, the order above
shows precedence: JAVA_OPTS lowest, command line options highest.
EOM
}
addJava () {
dlog "[addJava] arg = '$1'"
java_args=( "${java_args[@]}" "$1" )
}
addSbt () {
dlog "[addSbt] arg = '$1'"
sbt_commands=( "${sbt_commands[@]}" "$1" )
}
addScalac () {
dlog "[addScalac] arg = '$1'"
scalac_args=( "${scalac_args[@]}" "$1" )
}
addResidual () {
dlog "[residual] arg = '$1'"
residual_args=( "${residual_args[@]}" "$1" )
}
addResolver () {
addSbt "set resolvers in ThisBuild += $1"
}
addDebugger () {
addJava "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1"
}
get_jvm_opts () {
# echo "${JAVA_OPTS:-$default_jvm_opts}"
# echo "${SBT_OPTS:-$default_sbt_opts}"
[[ -f "$jvm_opts_file" ]] && cat "$jvm_opts_file"
}
process_args ()
{
require_arg () {
local type="$1"
local opt="$2"
local arg="$3"
if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
die "$opt requires <$type> argument"
fi
}
while [[ $# -gt 0 ]]; do
case "$1" in
-h|-help) usage; exit 1 ;;
-v|-verbose) verbose=1 && shift ;;
-d|-debug) debug=1 && shift ;;
-q|-quiet) quiet=1 && shift ;;
-ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
-mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
-no-share) addJava "$noshare_opts" && shift ;;
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
-sbt-dir) require_arg path "$1" "$2" && sbt_dir="$2" && shift 2 ;;
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
-offline) addSbt "set offline := true" && shift ;;
-jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
-batch) exec </dev/null && shift ;;
-sbt-create) sbt_create=true && shift ;;
-sbt-snapshot) sbt_explicit_version=$sbt_snapshot_version && shift ;;
-sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
-sbt-version) require_arg version "$1" "$2" && sbt_explicit_version="$2" && shift 2 ;;
-sbt-launch-dir) require_arg path "$1" "$2" && sbt_launch_dir="$2" && shift 2 ;;
-scala-version) require_arg version "$1" "$2" && addSbt "set scalaVersion := \"$2\"" && shift 2 ;;
-scala-home) require_arg path "$1" "$2" && addSbt "set scalaHome in ThisBuild := Some(file(\"$2\"))" && shift 2 ;;
-java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && shift 2 ;;
-D*) addJava "$1" && shift ;;
-J*) addJava "${1:2}" && shift ;;
-S*) addScalac "${1:2}" && shift ;;
-28) addSbt "++ $latest_28" && shift ;;
-29) addSbt "++ $latest_29" && shift ;;
-210) addSbt "++ $latest_210" && shift ;;
*) addResidual "$1" && shift ;;
esac
done
[[ $debug ]] && {
case $(sbt_version) in
0.7.*) addSbt "debug" ;;
*) addSbt "set logLevel in Global := Level.Debug" ;;
esac
}
[[ $quiet ]] && {
case $(sbt_version) in
0.7.*) ;;
*) addSbt "set logLevel in Global := Level.Error" ;;
esac
}
}
# if .sbtopts exists, prepend its contents to $@ so it can be processed by this runner
[[ -f "$sbt_opts_file" ]] && {
sbtargs=()
while IFS= read -r arg; do
sbtargs=( "${sbtargs[@]}" "$arg" )
done <"$sbt_opts_file"
set -- "${sbtargs[@]}" "$@"
}
# process the combined args, then reset "$@" to the residuals
process_args "$@"
set -- "${residual_args[@]}"
argumentCount=$#
# set scalacOptions if we were given any -S opts
[[ ${#scalac_args[@]} -eq 0 ]] || addSbt "set scalacOptions in ThisBuild += \"${scalac_args[@]}\""
# Update build.properties no disk to set explicit version - sbt gives us no choice
[[ -n "$sbt_explicit_version" ]] && update_build_props_sbt "$sbt_explicit_version"
echo "Detected sbt version $(sbt_version)"
[[ -n "$scala_version" ]] && echo "Overriding scala version to $scala_version"
# no args - alert them there's stuff in here
(( $argumentCount > 0 )) || echo "Starting $script_name: invoke with -help for other options"
# verify this is an sbt dir or -create was given
[[ -f ./build.sbt || -d ./project || -n "$sbt_create" ]] || {
cat <<EOM
$(pwd) doesn't appear to be an sbt project.
If you want to start sbt anyway, run:
$0 -sbt-create
EOM
exit 1
}
# pick up completion if present; todo
[[ -f .sbt_completion.sh ]] && source .sbt_completion.sh
# no jar? download it.
[[ -f "$sbt_jar" ]] || acquire_sbt_jar || {
# still no jar? uh-oh.
echo "Download failed. Obtain the jar manually and place it at $sbt_jar"
exit 1
}
[[ -n "$sbt_dir" ]] || {
sbt_dir=~/.sbt/$(sbt_version)
addJava "-Dsbt.global.base=$sbt_dir"
echo "Using $sbt_dir as sbt dir, -sbt-dir to override."
}
# since sbt 0.7 doesn't understand iflast
(( ${#residual_args[@]} == 0 )) && residual_args=( "shell" )
# run sbt
execRunner "$java_cmd" \
$(get_mem_opts $sbt_mem) \
$(get_jvm_opts) \
${java_args[@]} \
-jar "$sbt_jar" \
"${sbt_commands[@]}" \
"${residual_args[@]}"

1
samples/Text/mac.txt Normal file
View File

@@ -0,0 +1 @@
line 1

View File

@@ -2,6 +2,7 @@ require 'linguist/file_blob'
require 'linguist/samples'
require 'test/unit'
require 'mocha'
require 'mime/types'
require 'pygments'
@@ -64,6 +65,14 @@ class TestBlob < Test::Unit::TestCase
assert_equal ["module Foo", "end", ""], blob("Ruby/foo.rb").lines
end
def test_mac_format
assert blob("Text/mac.txt").mac_format?
end
def test_lines_mac_format
assert_equal ["line 1", "line 2", ""], blob("Text/mac.txt").lines
end
def test_size
assert_equal 15, blob("Ruby/foo.rb").size
end
@@ -261,6 +270,12 @@ class TestBlob < Test::Unit::TestCase
assert !blob("Text/dump.sql").indexable?
assert !blob("Binary/github.po").indexable?
assert !blob("Binary/linguist.gem").indexable?
# large binary blobs should fail on size check first, not call
# into charlock_holmes and alloc big buffers for testing encoding
b = blob("Binary/octocat.ai")
b.expects(:binary?).never
assert !b.indexable?
end
def test_language

View File

@@ -1,4 +1,6 @@
require 'linguist/samples'
require 'tempfile'
require 'yajl'
require 'test/unit'
@@ -12,6 +14,19 @@ class TestSamples < Test::Unit::TestCase
# Just warn, it shouldn't scare people off by breaking the build.
if serialized['md5'] != latest['md5']
warn "Samples database is out of date. Run `bundle exec rake samples`."
expected = Tempfile.new('expected.json')
expected.write Yajl::Encoder.encode(serialized, :pretty => true)
expected.close
actual = Tempfile.new('actual.json')
actual.write Yajl::Encoder.encode(latest, :pretty => true)
actual.close
warn `diff #{expected.path} #{actual.path}`
expected.unlink
actual.unlink
end
end