From 7702583314e4fbd2795a58a90c0d68ea59c19089 Mon Sep 17 00:00:00 2001 From: Joseph Hall Date: Sun, 16 Nov 2014 07:19:55 -0700 Subject: [PATCH 1/2] Python also supports triple single-quotes for comments --- lib/linguist/tokenizer.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb index 4b2ea607..05882649 100644 --- a/lib/linguist/tokenizer.rb +++ b/lib/linguist/tokenizer.rb @@ -33,7 +33,8 @@ module Linguist [''], # XML ['{-', '-}'], # Haskell ['(*', '*)'], # Coq - ['"""', '"""'] # Python + ['"""', '"""'], # Python + ["'''", "'''"] # Python ] START_SINGLE_LINE_COMMENT = Regexp.compile(SINGLE_LINE_COMMENTS.map { |c| From c5a654e692240557a05a2de76119aad006da87df Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 25 Nov 2014 20:01:24 -0500 Subject: [PATCH 2/2] Tests for Python multiline comments during tokenization --- test/test_tokenizer.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_tokenizer.rb b/test/test_tokenizer.rb index 0521f4da..a460afc3 100644 --- a/test/test_tokenizer.rb +++ b/test/test_tokenizer.rb @@ -43,6 +43,8 @@ class TestTokenizer < Test::Unit::TestCase assert_equal %w(foo), tokenize("foo {- Comment -}") assert_equal %w(foo), tokenize("foo (* Comment *)") assert_equal %w(%), tokenize("2 % 10\n% Comment") + assert_equal %w(foo bar), tokenize("foo\n\"\"\"\nComment\n\"\"\"\nbar") + assert_equal %w(foo bar), tokenize("foo\n'''\nComment\n'''\nbar") end def test_sgml_tags