From 5d4057324faf5336bffe0e436bd4f0dad52cb5c3 Mon Sep 17 00:00:00 2001 From: Tristan Hume Date: Wed, 16 Nov 2011 18:55:55 -0500 Subject: [PATCH 1/3] Added Turing language detection --- lib/linguist/blob_helper.rb | 14 ++++++++++++++ lib/linguist/languages.yml | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index be4483cf..f8149c82 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -467,6 +467,20 @@ module Linguist Language['R'] end end + + # Internal: Guess language of .t files. + # + # Makes fairly sure that it is Turing. + # Turing is not very popular so it would not be good to have perl users' files being confused. + # + # Returns a Language. + def guess_t_language + if lines.grep(/:=/).any? && lines.grep(/proc |procedure |fcn |function /).any? && lines.grep(/var/).any? + Language['Turing'] + else + Language['Perl'] + end + end # Internal: Guess language of .gsp files. # diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index a7de46f3..36748a7d 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -717,6 +717,7 @@ Perl: type: programming overrides: - .pl + - .t primary_extension: .pl extensions: - .PL @@ -965,6 +966,14 @@ Textile: lexer: Text only extensions: - .textile + +Turing: + type: programming + lexer: Text only + primary_extension: .t + extensions: + - .t + - .tu Twig: type: markup From 0b5a2656442a99e174dfc1faec592c855f8b15ad Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Mon, 21 Nov 2011 10:20:19 -0600 Subject: [PATCH 2/3] Cleanup whitespace --- lib/linguist/blob_helper.rb | 2 +- lib/linguist/languages.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index f8149c82..723f98ce 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -467,7 +467,7 @@ module Linguist Language['R'] end end - + # Internal: Guess language of .t files. # # Makes fairly sure that it is Turing. diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 465f1da4..f03bacd8 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -972,7 +972,7 @@ Textile: lexer: Text only extensions: - .textile - + Turing: type: programming lexer: Text only From e4fe1d17e7263af735f1d96ed80c9d932220c869 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Mon, 21 Nov 2011 10:42:39 -0600 Subject: [PATCH 3/3] Add tests for Perl and Turning detection --- lib/linguist/blob_helper.rb | 16 ++++++++++++---- test/fixtures/perl-test.t | 10 ++++++++++ test/fixtures/turing.t | 19 +++++++++++++++++++ test/test_blob.rb | 4 ++++ test/test_language.rb | 3 +++ 5 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 test/fixtures/perl-test.t create mode 100644 test/fixtures/turing.t diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 723f98ce..64497ca5 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -470,12 +470,20 @@ module Linguist # Internal: Guess language of .t files. # - # Makes fairly sure that it is Turing. - # Turing is not very popular so it would not be good to have perl users' files being confused. - # # Returns a Language. def guess_t_language - if lines.grep(/:=/).any? && lines.grep(/proc |procedure |fcn |function /).any? && lines.grep(/var/).any? + score = 0 + score += 1 if lines.grep(/^% /).any? + score += data.gsub(/ := /).count + score += data.gsub(/proc |procedure |fcn |function /).count + score += data.gsub(/var \w+: \w+/).count + + # Tell-tale signs its gotta be Perl + if lines.grep(/^(my )?(sub |\$|@|%)\w+/).any? + score = 0 + end + + if score >= 3 Language['Turing'] else Language['Perl'] diff --git a/test/fixtures/perl-test.t b/test/fixtures/perl-test.t new file mode 100644 index 00000000..a234f9a7 --- /dev/null +++ b/test/fixtures/perl-test.t @@ -0,0 +1,10 @@ +use strict; +use warnings; + +use Foo::Bar + +$n = 42; +$name = "world"; +@array = ("1","2","3"); +%hash = ("foo":"bar"); +my $name = "josh"; diff --git a/test/fixtures/turing.t b/test/fixtures/turing.t new file mode 100644 index 00000000..c68f8eee --- /dev/null +++ b/test/fixtures/turing.t @@ -0,0 +1,19 @@ +% Accepts a number and calculates its factorial + +function factorial (n: int) : real + if n = 0 then + result 1 + else + result n * factorial (n - 1) + end if +end factorial + +var n: int +loop + put "Please input an integer: " .. + get n + exit when n >= 0 + put "Input must be a non-negative integer." +end loop + +put "The factorial of ", n, " is ", factorial (n) diff --git a/test/test_blob.rb b/test/test_blob.rb index ac426842..5d00078c 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -281,6 +281,10 @@ class TestBlob < Test::Unit::TestCase assert_equal Language['R'], blob("hello-r.R").language assert_equal Language['Rebol'], blob("hello-rebol.r").language + # .t disambiguation + assert_equal Language['Perl'], blob("perl-test.t").language + assert_equal Language['Turing'], blob("turing.t").language + # ML assert_equal Language['OCaml'], blob("Foo.ml").language assert_equal Language['Standard ML'], blob("Foo.sig").language diff --git a/test/test_language.rb b/test/test_language.rb index 96eb88f2..1222b864 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -20,6 +20,9 @@ class TestLanguage < Test::Unit::TestCase assert Language.ambiguous?('.r') assert_equal Language['R'], Language.find_by_extension('r') + + assert Language.ambiguous?('.t') + assert_equal Language['Perl'], Language.find_by_extension('t') end def test_lexer