Compare commits

..

17 Commits

Author SHA1 Message Date
Joshua Peek
9e9500dfa9 Linguist 2.3.4 2012-09-24 10:54:17 -05:00
Joshua Peek
04cc100fba Rebuild samples db 2012-09-24 10:52:05 -05:00
Joshua Peek
31e33f99f2 Ensure lang is skipped on any binary file 2012-09-24 10:51:39 -05:00
Joshua Peek
7c51b90586 Skip empty sample 2012-09-24 10:50:49 -05:00
Joshua Peek
2b36f73da6 Some comments are triggering charlock binary 2012-09-24 10:48:22 -05:00
Joshua Peek
d96dd473b8 Rebuild samples db 2012-09-24 10:12:18 -05:00
Joshua Peek
f9066ffb7b Sort exts and filenames 2012-09-24 10:12:05 -05:00
Joshua Peek
945941d529 Update samples db 2012-09-24 10:07:58 -05:00
Joshua Peek
10e875e899 Print out samples db diffs 2012-09-24 10:07:08 -05:00
Justin Palmer
d24e5c938e sample directory needs uppercase E 2012-09-20 15:23:58 -07:00
Justin Palmer
aa069a336f add color to ecl language 2012-09-20 15:16:06 -07:00
Justin Palmer
662fc2ee9d Merge remote-tracking branch 'rengolin/ecl' 2012-09-20 15:07:41 -07:00
Ryan Tomayko
567cd6ef68 Merge pull request #250 from github/mac-format
Handle Mac Format when splitting lines
2012-09-11 14:17:21 -07:00
Ryan Tomayko
887a050db9 Only search the first 4K chars for \r 2012-09-10 01:56:08 -07:00
Ryan Tomayko
bda895eaae Test Mac Format detection and line splitting 2012-09-10 01:52:30 -07:00
Ryan Tomayko
2e49c06f47 Handle Mac Format when splitting lines 2012-09-10 01:05:48 -07:00
Renato Golin
da6cf8dbb4 Add ECL programming language and test 2012-07-12 09:09:32 +01:00
15 changed files with 234 additions and 87 deletions

View File

@@ -1,6 +1,6 @@
Gem::Specification.new do |s|
s.name = 'github-linguist'
s.version = '2.3.3'
s.version = '2.3.4'
s.summary = "GitHub Language detection"
s.authors = "GitHub"

View File

@@ -204,7 +204,31 @@ module Linguist
#
# Returns an Array of lines
def lines
@lines ||= (viewable? && data) ? data.split("\n", -1) : []
@lines ||=
if viewable? && data
data.split(line_split_character, -1)
else
[]
end
end
# Character used to split lines. This is almost always "\n" except when Mac
# Format is detected in which case it's "\r".
#
# Returns a split pattern string.
def line_split_character
@line_split_character ||= (mac_format?? "\r" : "\n")
end
# Public: Is the data in ** Mac Format **. This format uses \r (0x0d) characters
# for line ends and does not include a \n (0x0a).
#
# Returns true when mac format is detected.
def mac_format?
return if !viewable?
if pos = data[0, 4096].index("\r")
data[pos + 1] != ?\n
end
end
# Public: Get number of lines of code
@@ -278,7 +302,7 @@ module Linguist
if defined?(@data) && @data.is_a?(String)
data = @data
else
data = lambda { binary_mime_type? ? "" : self.data }
data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
end
@language = Language.detect(name.to_s, data, mode)

View File

@@ -84,7 +84,9 @@ module Linguist
if possible_languages.length > 1
data = data.call() if data.respond_to?(:call)
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
if data.nil? || data == ""
nil
elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
end
else

View File

@@ -367,6 +367,14 @@ Ecere Projects:
extensions:
- .epj
Ecl:
type: programming
color: "#8a1267"
primary_extension: .ecl
lexer: ECL
extensions:
- .eclxml
Eiffel:
type: programming
lexer: Text only

View File

@@ -17,11 +17,11 @@
".h"
],
"C++": [
".h",
".hpp",
".cu",
".cc",
".cpp",
".cc"
".cu",
".h",
".hpp"
],
"Ceylon": [
".ceylon"
@@ -41,6 +41,9 @@
"Diff": [
".patch"
],
"Ecl": [
".ecl"
],
"Emacs Lisp": [
".el"
],
@@ -48,11 +51,11 @@
".s"
],
"Gosu": [
".gs",
".gsp",
".gst",
".gsx",
".vark",
".gs"
".vark"
],
"Groovy": [
".gradle",
@@ -75,9 +78,9 @@
".script!"
],
"JSON": [
".json",
".maxhelp",
".maxpat",
".json"
".maxpat"
],
"Julia": [
".jl"
@@ -130,14 +133,14 @@
".pir"
],
"Perl": [
".pm",
".pl",
".t",
".script!"
".pm",
".script!",
".t"
],
"PHP": [
".php",
".module"
".module",
".php"
],
"PowerShell": [
".ps1",
@@ -154,17 +157,17 @@
".R"
],
"Racket": [
".script!",
".scrbl"
".scrbl",
".script!"
],
"Rebol": [
".r"
],
"Ruby": [
".rb",
".script!",
".rabl",
".rake"
".rake",
".rb",
".script!"
],
"Rust": [
".rs"
@@ -180,17 +183,17 @@
".sps"
],
"Scilab": [
".sci",
".sce",
".sci",
".tst"
],
"SCSS": [
".scss"
],
"Shell": [
".bash",
".script!",
".sh",
".bash",
".zsh"
],
"Standard ML": [
@@ -257,7 +260,7 @@
".gemrc"
]
},
"tokens_total": 270906,
"tokens_total": 271187,
"languages_total": 275,
"tokens": {
"Apex": {
@@ -9157,6 +9160,84 @@
"d472341..8ad9ffb": 1,
"+": 3
},
"Ecl": {
"#option": 1,
"(": 32,
"true": 1,
")": 32,
";": 23,
"namesRecord": 4,
"RECORD": 1,
"string20": 1,
"surname": 1,
"string10": 2,
"forename": 1,
"integer2": 5,
"age": 2,
"dadAge": 1,
"mumAge": 1,
"END": 1,
"namesRecord2": 3,
"record": 1,
"extra": 1,
"end": 1,
"namesTable": 11,
"dataset": 2,
"FLAT": 2,
"namesTable2": 9,
"aveAgeL": 3,
"l": 1,
"l.dadAge": 1,
"+": 16,
"l.mumAge": 1,
"/2": 2,
"aveAgeR": 4,
"r": 1,
"r.dadAge": 1,
"r.mumAge": 1,
"output": 9,
"join": 11,
"left": 2,
"right": 3,
"//Several": 1,
"simple": 1,
"examples": 1,
"of": 1,
"sliding": 2,
"syntax": 1,
"left.age": 8,
"right.age": 12,
"-": 5,
"and": 10,
"<": 1,
"between": 7,
"//Same": 1,
"but": 1,
"on": 1,
"strings.": 1,
"Also": 1,
"includes": 1,
"to": 1,
"ensure": 1,
"sort": 1,
"is": 1,
"done": 1,
"by": 1,
"non": 1,
"before": 1,
"sliding.": 1,
"left.surname": 2,
"right.surname": 4,
"[": 4,
"]": 4,
"all": 1,
"//This": 1,
"should": 1,
"not": 1,
"generate": 1,
"a": 1,
"self": 1
},
"Emacs Lisp": {
"(": 1,
"print": 1,
@@ -27671,6 +27752,7 @@
"Dart": 68,
"Delphi": 30,
"Diff": 16,
"Ecl": 281,
"Emacs Lisp": 3,
"GAS": 133,
"Gosu": 413,
@@ -27741,6 +27823,7 @@
"Dart": 1,
"Delphi": 1,
"Diff": 1,
"Ecl": 1,
"Emacs Lisp": 1,
"GAS": 1,
"Gosu": 5,
@@ -27761,7 +27844,7 @@
"Nemerle": 1,
"Nimrod": 1,
"Nu": 1,
"Objective-C": 20,
"Objective-C": 19,
"OCaml": 1,
"Opa": 2,
"OpenCL": 1,
@@ -27798,5 +27881,5 @@
"XSLT": 1,
"YAML": 1
},
"md5": "73937d71c79ba8a2f9e085431017cb2d"
"md5": "8591cfa68ab6fe3b3dacbcb885be70d0"
}

View File

@@ -76,12 +76,14 @@ module Linguist
db['extnames'][language_name] ||= []
if !db['extnames'][language_name].include?(sample[:extname])
db['extnames'][language_name] << sample[:extname]
db['extnames'][language_name].sort!
end
end
if sample[:filename]
db['filenames'][language_name] ||= []
db['filenames'][language_name] << sample[:filename]
db['filenames'][language_name].sort!
end
data = File.read(sample[:path])

View File

@@ -1,13 +1,3 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
(** This file is deprecated, for a tree on list, use [Mergesort.v]. *)
(** A development of Treesort on Heap trees. It has an average
complexity of O(n.log n) but of O() in the worst case (e.g. if
the list is already sorted) *)
@@ -88,9 +78,9 @@ Section defs.
forall P:Tree -> Type,
P Tree_Leaf ->
(forall (a:A) (T1 T2:Tree),
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
forall T:Tree, is_heap T -> P T.
Proof.
simple induction T; auto with datatypes.
@@ -105,9 +95,9 @@ Section defs.
forall P:Tree -> Set,
P Tree_Leaf ->
(forall (a:A) (T1 T2:Tree),
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
leA_Tree a T1 ->
leA_Tree a T2 ->
is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
forall T:Tree, is_heap T -> P T.
Proof.
simple induction T; auto with datatypes.
@@ -135,13 +125,13 @@ Section defs.
(forall a, HdRel leA a l1 -> HdRel leA a l2 -> HdRel leA a l) ->
merge_lem l1 l2.
Require Import Morphisms.
Instance: Equivalence (@meq A).
Proof. constructor; auto with datatypes. red. apply meq_trans. Defined.
Instance: Proper (@meq A ++> @meq _ ++> @meq _) (@munion A).
Proof. intros x y H x' y' H'. now apply meq_congr. Qed.
Lemma merge :
forall l1:list A, Sorted leA l1 ->
forall l2:list A, Sorted leA l2 -> merge_lem l1 l2.
@@ -150,8 +140,8 @@ Section defs.
apply merge_exist with l2; auto with datatypes.
rename l1 into l.
revert l2 H0. fix 1. intros.
destruct l2 as [|a0 l0].
apply merge_exist with (a :: l); simpl; auto with datatypes.
destruct l2 as [|a0 l0].
apply merge_exist with (a :: l); simpl; auto with datatypes.
elim (leA_dec a a0); intros.
(* 1 (leA a a0) *)
@@ -159,18 +149,18 @@ Section defs.
destruct (merge l H (a0 :: l0) H0).
apply merge_exist with (a :: l1). clear merge merge0.
auto using cons_sort, cons_leA with datatypes.
simpl. rewrite m. now rewrite munion_ass.
intros. apply cons_leA.
simpl. rewrite m. now rewrite munion_ass.
intros. apply cons_leA.
apply (@HdRel_inv _ leA) with l; trivial with datatypes.
(* 2 (leA a0 a) *)
apply Sorted_inv in H0. destruct H0.
destruct (merge0 l0 H0). clear merge merge0.
apply merge_exist with (a0 :: l1);
destruct (merge0 l0 H0). clear merge merge0.
apply merge_exist with (a0 :: l1);
auto using cons_sort, cons_leA with datatypes.
simpl; rewrite m. simpl. setoid_rewrite munion_ass at 1. rewrite munion_comm.
repeat rewrite munion_ass. setoid_rewrite munion_comm at 3. reflexivity.
intros. apply cons_leA.
intros. apply cons_leA.
apply (@HdRel_inv _ leA) with l0; trivial with datatypes.
Qed.
@@ -186,7 +176,7 @@ Section defs.
match t with
| Tree_Leaf => emptyBag
| Tree_Node a t1 t2 =>
munion (contents t1) (munion (contents t2) (singletonBag a))
munion (contents t1) (munion (contents t2) (singletonBag a))
end.
@@ -272,11 +262,11 @@ Section defs.
apply flat_exist with (a :: l); simpl; auto with datatypes.
apply meq_trans with
(munion (list_contents _ eqA_dec l1)
(munion (list_contents _ eqA_dec l2) (singletonBag a))).
(munion (list_contents _ eqA_dec l2) (singletonBag a))).
apply meq_congr; auto with datatypes.
apply meq_trans with
(munion (singletonBag a)
(munion (list_contents _ eqA_dec l1) (list_contents _ eqA_dec l2))).
(munion (list_contents _ eqA_dec l1) (list_contents _ eqA_dec l2))).
apply munion_rotate.
apply meq_right; apply meq_sym; trivial with datatypes.
Qed.

View File

@@ -1,11 +1,3 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
Require Import Omega Relations Multiset SetoidList.
(** This file is deprecated, use [Permutation.v] instead.
@@ -154,7 +146,7 @@ Lemma permut_add_cons_inside :
Proof.
intros;
replace (a :: l) with ([] ++ a :: l); trivial;
apply permut_add_inside; trivial.
apply permut_add_inside; trivial.
Qed.
Lemma permut_middle :
@@ -168,8 +160,8 @@ Lemma permut_sym_app :
Proof.
intros l1 l2;
unfold permutation, meq;
intro a; do 2 rewrite list_contents_app; simpl;
auto with arith.
intro a; do 2 rewrite list_contents_app; simpl;
auto with arith.
Qed.
Lemma permut_rev :

View File

@@ -1,17 +1,5 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
(*********************************************************************)
(** * List permutations as a composition of adjacent transpositions *)
(*********************************************************************)
(* Adapted in May 2006 by Jean-Marc Notin from initial contents by
Laurent Théry (Huffmann contribution, October 2003) *)
Laurent Thery (Huffmann contribution, October 2003) *)
Require Import List Setoid Compare_dec Morphisms.
Import ListNotations. (* For notations [] and [a;b;c] *)

View File

@@ -1,10 +1,2 @@
(************************************************************************)
(* v * The Coq Proof Assistant / The Coq Development Team *)
(* <O___,, * INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010 *)
(* \VV/ **************************************************************)
(* // * This file is distributed under the terms of the *)
(* * GNU Lesser General Public License Version 2.1 *)
(************************************************************************)
Require Export Sorted.
Require Export Mergesort.

42
samples/Ecl/sample.ecl Normal file
View File

@@ -0,0 +1,42 @@
/*
* Multi-line comment
*/
#option ('slidingJoins', true);
namesRecord :=
RECORD
string20 surname;
string10 forename;
integer2 age;
integer2 dadAge;
integer2 mumAge;
END;
namesRecord2 :=
record
string10 extra;
namesRecord;
end;
namesTable := dataset('x',namesRecord,FLAT);
namesTable2 := dataset('y',namesRecord2,FLAT);
integer2 aveAgeL(namesRecord l) := (l.dadAge+l.mumAge)/2;
integer2 aveAgeR(namesRecord2 r) := (r.dadAge+r.mumAge)/2;
// Standard join on a function of left and right
output(join(namesTable, namesTable2, aveAgeL(left) = aveAgeR(right)));
//Several simple examples of sliding join syntax
output(join(namesTable, namesTable2, left.age >= right.age - 10 and left.age <= right.age +10));
output(join(namesTable, namesTable2, left.age between right.age - 10 and right.age +10));
output(join(namesTable, namesTable2, left.age between right.age + 10 and right.age +30));
output(join(namesTable, namesTable2, left.age between (right.age + 20) - 10 and (right.age +20) + 10));
output(join(namesTable, namesTable2, aveAgeL(left) between aveAgeR(right)+10 and aveAgeR(right)+40));
//Same, but on strings. Also includes age to ensure sort is done by non-sliding before sliding.
output(join(namesTable, namesTable2, left.surname between right.surname[1..10]+'AAAAAAAAAA' and right.surname[1..10]+'ZZZZZZZZZZ' and left.age=right.age));
output(join(namesTable, namesTable2, left.surname between right.surname[1..10]+'AAAAAAAAAA' and right.surname[1..10]+'ZZZZZZZZZZ' and left.age=right.age,all));
//This should not generate a self join
output(join(namesTable, namesTable, left.age between right.age - 10 and right.age +10));

1
samples/Text/mac.txt Normal file
View File

@@ -0,0 +1 @@
line 1

View File

@@ -65,6 +65,14 @@ class TestBlob < Test::Unit::TestCase
assert_equal ["module Foo", "end", ""], blob("Ruby/foo.rb").lines
end
def test_mac_format
assert blob("Text/mac.txt").mac_format?
end
def test_lines_mac_format
assert_equal ["line 1", "line 2", ""], blob("Text/mac.txt").lines
end
def test_size
assert_equal 15, blob("Ruby/foo.rb").size
end

View File

@@ -1,4 +1,6 @@
require 'linguist/samples'
require 'tempfile'
require 'yajl'
require 'test/unit'
@@ -12,6 +14,19 @@ class TestSamples < Test::Unit::TestCase
# Just warn, it shouldn't scare people off by breaking the build.
if serialized['md5'] != latest['md5']
warn "Samples database is out of date. Run `bundle exec rake samples`."
expected = Tempfile.new('expected.json')
expected.write Yajl::Encoder.encode(serialized, :pretty => true)
expected.close
actual = Tempfile.new('actual.json')
actual.write Yajl::Encoder.encode(latest, :pretty => true)
actual.close
warn `diff #{expected.path} #{actual.path}`
expected.unlink
actual.unlink
end
end