Merge branch 'master' into 1515-local

This commit is contained in:
Arfon Smith
2014-09-30 08:38:26 -05:00
26 changed files with 457 additions and 74022 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@ Gemfile.lock
.bundle/
vendor/
benchmark/
lib/linguist/samples.json

View File

@@ -3,9 +3,6 @@ before_install:
- git fetch origin v2.0.0:v2.0.0
- git fetch origin test/attributes:test/attributes
- sudo apt-get install libicu-dev -y
- gem update --system 2.1.11
before_script:
- bundle exec rake samples
rvm:
- 1.9.3
- 2.0.0

View File

@@ -102,10 +102,6 @@ We try to only add languages once they have some usage on GitHub, so please note
Almost all bug fixes or new language additions should come with some additional code samples. Just drop them under [`samples/`](https://github.com/github/linguist/tree/master/samples) in the correct subdirectory and our test suite will automatically test them. In most cases you shouldn't need to add any new assertions.
To update the `samples.json` after adding new files to [`samples/`](https://github.com/github/linguist/tree/master/samples):
bundle exec rake samples
### A note on language extensions
Linguist has a number of methods available to it for identifying the language of a particular file. The initial lookup is based upon the extension of the file, possible file extensions are defined in an array called `extensions`. Take a look at this example for example for `Perl`:

View File

@@ -7,6 +7,16 @@ task :default => :test
Rake::TestTask.new
# Extend test task to check for samples
task :test => :check_samples
desc "Check that we have samples.json generated"
task :check_samples do
unless File.exist?('lib/linguist/samples.json')
Rake::Task[:samples].invoke
end
end
task :samples do
require 'linguist/samples'
require 'yajl'
@@ -15,7 +25,7 @@ task :samples do
File.open('lib/linguist/samples.json', 'w') { |io| io.write json }
end
task :build_gem do
task :build_gem => :samples do
languages = YAML.load_file("lib/linguist/languages.yml")
File.write("lib/linguist/languages.json", JSON.dump(languages))
`gem build github-linguist.gemspec`
@@ -98,7 +108,7 @@ namespace :classifier do
next if file_language.nil? || file_language == 'Text'
begin
data = open(file_url).read
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples.cache, data).first
total += 1
guessed_language == file_language ? correct += 1 : incorrect += 1

View File

@@ -63,6 +63,7 @@ module Linguist
generated_jni_header? ||
composer_lock? ||
node_modules? ||
godeps? ||
vcr_cassette? ||
generated_by_zephir?
end
@@ -231,6 +232,14 @@ module Linguist
!!name.match(/node_modules\//)
end
# Internal: Is the blob part of Godeps/,
# which are not meant for humans in pull requests.
#
# Returns true or false.
def godeps?
!!name.match(/Godeps\//)
end
# Internal: Is the blob a generated php composer lock file?
#
# Returns true or false.

View File

@@ -19,6 +19,9 @@ module Linguist
if languages.all? { |l| ["ECL", "Prolog"].include?(l) }
result = disambiguate_ecl(data, languages)
end
if languages.all? { |l| ["Common Lisp", "OpenCL"].include?(l) }
result = disambiguate_cl(data, languages)
end
return result
end
end

View File

@@ -136,7 +136,7 @@ module Linguist
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
determined.first
# Lastly, fall back to the probabilistic classifier.
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names).first
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]]
end
@@ -510,9 +510,9 @@ module Linguist
end
end
extensions = Samples::DATA['extnames']
interpreters = Samples::DATA['interpreters']
filenames = Samples::DATA['filenames']
extensions = Samples.cache['extnames']
interpreters = Samples.cache['interpreters']
filenames = Samples.cache['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
languages_yml = File.expand_path("../languages.yml", __FILE__)

View File

@@ -748,6 +748,7 @@ Forth:
- .fth
- .4th
- .forth
- .frt
Frege:
type: programming
@@ -891,6 +892,12 @@ Grammatical Framework:
searchable: true
color: "#ff0000"
Graph Modeling Language:
type: data
lexer: Text only
extensions:
- .gml
Groff:
extensions:
- .man
@@ -1163,6 +1170,7 @@ JavaScript:
- .es6
- .frag
- .jake
- .jsb
- .jsfl
- .jsm
- .jss
@@ -1718,6 +1726,7 @@ Pascal:
- .dfm
- .dpr
- .lpr
- .pp
Perl:
type: programming

File diff suppressed because it is too large Load Diff

View File

@@ -17,9 +17,11 @@ module Linguist
PATH = File.expand_path('../samples.json', __FILE__)
# Hash of serialized samples object
if File.exist?(PATH)
serializer = defined?(JSON) ? JSON : YAML
DATA = serializer.load(File.read(PATH))
def self.cache
@cache ||= begin
serializer = defined?(JSON) ? JSON : YAML
serializer.load(File.read(PATH))
end
end
# Public: Iterate over each sample.

View File

@@ -33,6 +33,9 @@
# Erlang bundles
- ^rebar$
# Go dependencies
- Godeps/_workspace/
# Bootstrap minified css and js
- (^|/)bootstrap([^.]*)(\.min)?\.(js|css)$
@@ -235,3 +238,7 @@
- octicons.css
- octicons.min.css
- sprockets-octicons.scss
# Typesafe Activator
- (^|/)activator$
- (^|/)activator\.bat$

8
samples/Forth/bitmap.frt Normal file
View File

@@ -0,0 +1,8 @@
\ Bit arrays.
: bits ( u1 -- u2 ) 7 + 3 rshift ;
: bitmap ( u "name" -- ) create bits here over erase allot
does> ( u -- a x ) over 3 rshift + 1 rot 7 and lshift ;
: bit@ ( a x -- f ) swap c@ and ;
: 1bit ( a x -- ) over c@ or swap c! ;
: 0bit ( a x -- ) invert over c@ and swap c! ;
: bit! ( f a x -- ) rot if 1bit else 0bit then ;

7
samples/Forth/enum.frt Normal file
View File

@@ -0,0 +1,7 @@
\ Implements ENUM.
\ Double DOES>!
: enum create 0 , does> create dup @ 1 rot +! , does> @ ;
\ But this is simpler.
: enum create 0 , does> dup @ constant 1 swap +! ;

8
samples/Forth/macros.frt Normal file
View File

@@ -0,0 +1,8 @@
\ Simplifies compiling words.
: [[ ; immediate
: '<> >in @ ' swap >in ! <> ;
: (]]) begin dup '<> while postpone postpone repeat drop ;
: ]] ['] [[ (]]) ; immediate
( Usage: : foo ]] dup * [[ ; immediate : bar 42 foo . ; )

View File

@@ -0,0 +1,21 @@
graph
[
directed 0
node
[
id 0
label "Node 1"
value 100
]
node
[
id 1
label "Node 2"
value 200
]
edge
[
source 1
target 0
]
]

13
samples/Groff/sample.4 Normal file
View File

@@ -0,0 +1,13 @@
.TH FOO 1
.SH NAME
foo \- bar
.SH SYNOPSIS
.B foo
.I bar
.SH DESCRIPTION
Foo bar
.BR baz
quux.
.PP
.B Foo
bar baz.

View File

@@ -0,0 +1,12 @@
jsb.library('mylibrary', jsb.STATIC_LIBRARY, function(libObject) {
libObject.outputName = 'mylibrary';
libObject.cflags = [ '-Wall' ];
libObject.ldflags = [ '-pthread' ];
libObject.includePaths = [ 'src/include' ];
libObject.sources = [
'src/main.cpp',
'src/app.cpp'
];
});
jsb.build();

193
samples/Pascal/custforms.pp Normal file
View File

@@ -0,0 +1,193 @@
unit custforms;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, Forms;
Type
{ TCustomFormDescr }
TCustomFormDescr = Class
private
FAuthor: String;
FCaption: String;
FCategory: String;
FDescription: String;
FFormClass: TFormClass;
FLazPackage: String;
FUnitName: String;
public
Constructor Create(AFormClass : TFormClass; const APackage: string);
Constructor Create(AFormClass : TFormClass; Const ACaption,ADescription,AUnit,APackage : String);
Property FormClass : TFormClass Read FFormClass Write FFormClass;
Property Caption : String Read FCaption Write FCaption;
Property Description : String Read FDescription Write FDescription;
Property UnitName : String Read FUnitName Write FUnitName;
Property Category : String Read FCategory Write FCategory;
Property Author : String Read FAuthor Write FAuthor;
Property LazPackage : String Read FLazPackage Write FLazPackage;
end;
Procedure RegisterCustomForm(Descr : TCustomFormDescr);
Procedure RegisterCustomForm(AFormClass : TFormClass; const APackage: string);
Procedure RegisterCustomForm(AFormClass : TFormClass; Const AUnitName, APackage : String);
Procedure Register;
implementation
uses ProjectIntf,NewItemIntf,contnrs;
Const
SAppFrameWork = 'Custom forms';
SInstanceOf = 'Create a new instance of %s';
{ TCustomFormDescr }
constructor TCustomFormDescr.Create(AFormClass: TFormClass;
const APackage: string);
Var
N,U : String;
begin
N:=AFormClass.ClassName;
U:=N;
If (Upcase(U[1])='T') then
Delete(U,1,1);
Create(AFormClass,N,Format(SInstanceOf,[N]),U,APackage);
end;
constructor TCustomFormDescr.Create(AFormClass: TFormClass;
const ACaption, ADescription, AUnit, APackage: String);
begin
FFormClass:=AFormClass;
FCaption:=ACaption;
FDescription:=ADescription;
FUnitName:=AUnit;
FCategory:=SAppFrameWork;
FLazPackage:=APackage;
end;
// Registration code.
Type
{ TCustomFormFileDescriptor }
TCustomFormFileDescriptor = Class(TFileDescPascalUnitWithResource)
private
FFormDescr: TCustomFormDescr;
Public
Constructor Create(ADescr : TCustomFormDescr);
Property FormDescr : TCustomFormDescr Read FFormDescr;
Function GetLocalizedName : String; override;
Function GetLocalizedDescription : String; override;
Function GetInterfaceUsesSection : String; override;
end;
{ TCustomFormFileDescriptor }
constructor TCustomFormFileDescriptor.Create(ADescr: TCustomFormDescr);
begin
Inherited Create;
FFormDescr:=ADescr;
ResourceClass:=FFormDescr.FFormClass;
Name:=FFormDescr.Caption;
RequiredPackages:=ADescr.LazPackage;
//Writeln('TCustomFormFileDescriptor.Create RequiredPackages=',RequiredPackages);
end;
function TCustomFormFileDescriptor.GetLocalizedName: String;
begin
Result:=FFormDescr.Caption;
end;
function TCustomFormFileDescriptor.GetLocalizedDescription: String;
begin
Result:=FFormDescr.Description;
If (FFormDescr.Author<>'') then
Result:=Result+LineEnding+'By '+FFormDescr.Author;
end;
function TCustomFormFileDescriptor.GetInterfaceUsesSection: String;
begin
Result:=inherited GetInterfaceUsesSection;
Result:=Result+',Forms,'+FFormDescr.UnitName;
end;
Var
CustomFormList : TObjectList;
Procedure RegisterCustomForm(Descr : TCustomFormDescr);
begin
CustomFormList.Add(Descr);
end;
Procedure RegisterCustomForm(AFormClass : TFormClass; const APackage: string);
begin
RegisterCustomForm(TCustomFormDescr.Create(AFormClass,APackage));
end;
Procedure RegisterCustomForm(AFormClass : TFormClass; Const AUnitName, APackage : String);
Var
D : TCustomFormDescr;
begin
D:=TCustomFormDescr.Create(AFormClass,APackage);
D.UnitName:=AUnitName;
RegisterCustomForm(D);
end;
Procedure Register;
Var
L : TStringList;
I : Integer;
D : TCustomFormDescr;
begin
L:=TStringList.Create;
Try
L.Sorted:=True;
L.Duplicates:=dupIgnore;
For I:=0 to CustomFormList.Count-1 do
L.Add(TCustomFormDescr(CustomFormList[i]).Category);
For I:=0 to L.Count-1 do
begin
RegisterNewItemCategory(TNewIDEItemCategory.Create(L[i]));
end;
Finally
L.Free;
end;
For I:=0 to CustomFormList.Count-1 do
begin
D:=TCustomFormDescr(CustomFormList[i]);
RegisterProjectFileDescriptor(TCustomFormFileDescriptor.Create(D),D.Category);
end;
end;
Procedure InitCustomForms;
begin
CustomFormList:=TObjectList.Create;
end;
Procedure DoneCustomForms;
begin
FreeAndNil(CustomFormList);
end;
Initialization
InitCustomForms;
Finalization
DoneCustomForms;
end.

View File

@@ -0,0 +1,51 @@
{ $Id$ }
{
---------------------------------------------------------------------------
gtkextra.pp - GTK(2) widgetset - additional gdk/gtk functions
---------------------------------------------------------------------------
This unit contains missing gdk/gtk functions and defines for certain
versions of gtk or fpc.
---------------------------------------------------------------------------
@created(Sun Jan 28th WET 2006)
@lastmod($Date$)
@author(Marc Weustink <marc@@dommelstein.nl>)
*****************************************************************************
This file is part of the Lazarus Component Library (LCL)
See the file COPYING.modifiedLGPL.txt, included in this distribution,
for details about the license.
*****************************************************************************
}
unit GtkExtra;
{$mode objfpc}{$H+}
interface
{$I gtkdefines.inc}
{$ifdef gtk1}
{$I gtk1extrah.inc}
{$endif}
{$ifdef gtk2}
{$I gtk2extrah.inc}
{$endif}
implementation
{$ifdef gtk1}
{$I gtk1extra.inc}
{$endif}
{$ifdef gtk2}
{$I gtk2extra.inc}
{$endif}
end.

View File

@@ -0,0 +1,26 @@
define example::expiringhost($ip, $timestamp) {
# Calculate the age of this resource by comparing 'now' against $timestamp
$age = inline_template("<%= require 'time'; Time.now - Time.parse(timestamp) %>")
# Max age, in seconds.
$maxage = 60
if $age > $maxage {
$expired = true
notice("Expiring resource $class[$name] due to age > $maxage (actual: $age)")
} else {
$expired = false
notice("Found recently-active $class[$name] (age: $age)")
}
# I set target to a /tmp path so you can run this example as non-root.
# In production, you probabyl won't set target as it defaults to /etc/hosts
# (or wherever puppet thinks your platform wants it)
host {
$name:
ip => $ip,
target => "/tmp/expiring-hosts-example-output",
ensure => $expired ? { true => absent, false => present };
}
}

View File

@@ -0,0 +1,26 @@
class foo {
notify {
"foo": ;
}
}
class bar {
notify {
"bar": ;
}
}
node default {
stage {
"one": ;
"two": ;
}
class {
"foo": stage => "one";
"bar": stage => "two";
}
Stage["one"] -> Stage["two"]
}

View File

@@ -0,0 +1,22 @@
# Manually manage /tmp/original
# Each puppet run will copy it to /tmp/flag if there's a change and notify
# the exec when it changes.
#
# The idea here is you might need (in some case) to manually manage a file outside
# of puppet (in this case, "/tmp/original"). Using this example, you can make puppet
# signal other parts of your catalog based on changes to that file.
file {
# This will, when different, copy /tmp/original to /tmp/flag and notify our
# exec.
"/tmp/flag":
source => "file:///tmp/original",
notify => Exec["hello world"];
}
exec {
"hello world":
command => "/bin/echo hello world",
refreshonly => true;
}

View File

@@ -262,6 +262,10 @@ class TestBlob < Test::Unit::TestCase
assert Linguist::Generated.generated?("node_modules/grunt/lib/grunt.js", nil)
# Godep saved dependencies
assert blob("Godeps/Godeps.json").generated?
assert blob("Godeps/_workspace/src/github.com/kr/s3/sign.go").generated?
end
def test_vendored
@@ -279,6 +283,10 @@ class TestBlob < Test::Unit::TestCase
assert blob("app/bower_components/custom/custom.js").vendored?
assert blob("vendor/assets/bower_components/custom/custom.js").vendored?
# Go dependencies
assert !blob("Godeps/Godeps.json").vendored?
assert blob("Godeps/_workspace/src/github.com/kr/s3/sign.go").vendored?
# Rails vendor/
assert blob("vendor/plugins/will_paginate/lib/will_paginate.rb").vendored?
@@ -437,6 +445,12 @@ class TestBlob < Test::Unit::TestCase
assert blob("octicons.css").vendored?
assert blob("public/octicons.min.css").vendored?
assert blob("public/octicons/sprockets-octicons.scss").vendored?
# Typesafe Activator
assert blob("activator").vendored?
assert blob("activator.bat").vendored?
assert blob("subproject/activator").vendored?
assert blob("subproject/activator.bat").vendored?
end
def test_language

View File

@@ -44,12 +44,12 @@ class TestClassifier < Test::Unit::TestCase
end
def test_instance_classify_empty
results = Classifier.classify(Samples::DATA, "")
results = Classifier.classify(Samples.cache, "")
assert results.first[1] < 0.5, results.first.inspect
end
def test_instance_classify_nil
assert_equal [], Classifier.classify(Samples::DATA, nil)
assert_equal [], Classifier.classify(Samples.cache, nil)
end
def test_classify_ambiguous_languages
@@ -58,7 +58,7 @@ class TestClassifier < Test::Unit::TestCase
languages = Language.find_by_filename(sample[:path]).map(&:name)
next unless languages.length > 1
results = Classifier.classify(Samples::DATA, File.read(sample[:path]), languages)
results = Classifier.classify(Samples.cache, File.read(sample[:path]), languages)
assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}"
end
end

View File

@@ -8,7 +8,7 @@ class TestSamples < Test::Unit::TestCase
include Linguist
def test_up_to_date
assert serialized = Samples::DATA
assert serialized = Samples.cache
assert latest = Samples.data
# Just warn, it shouldn't scare people off by breaking the build.
@@ -29,7 +29,7 @@ class TestSamples < Test::Unit::TestCase
end
def test_verify
assert data = Samples::DATA
assert data = Samples.cache
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
@@ -38,7 +38,7 @@ class TestSamples < Test::Unit::TestCase
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
def test_parity
extensions = Samples::DATA['extnames']
extensions = Samples.cache['extnames']
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
languages = YAML.load_file(languages_yml)