mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Skip emiting comment tokens
This commit is contained in:
		@@ -16,12 +16,15 @@ module Linguist
 | 
				
			|||||||
      new.extract_tokens(data)
 | 
					      new.extract_tokens(data)
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Start state on token, ignore anything till the next newline
 | 
				
			||||||
    SINGLE_LINE_COMMENTS = [
 | 
					    SINGLE_LINE_COMMENTS = [
 | 
				
			||||||
      '//', # C
 | 
					      '//', # C
 | 
				
			||||||
      '#',  # Ruby
 | 
					      '#',  # Ruby
 | 
				
			||||||
      '%',  # Tex
 | 
					      '%',  # Tex
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Start state on opening token, ignore anything until the closing
 | 
				
			||||||
 | 
					    # token is reached.
 | 
				
			||||||
    MULTI_LINE_COMMENTS = [
 | 
					    MULTI_LINE_COMMENTS = [
 | 
				
			||||||
      ['/*', '*/'],    # C
 | 
					      ['/*', '*/'],    # C
 | 
				
			||||||
      ['<!--', '-->'], # XML
 | 
					      ['<!--', '-->'], # XML
 | 
				
			||||||
@@ -30,7 +33,7 @@ module Linguist
 | 
				
			|||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    START_SINGLE_LINE_COMMENT =  Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
 | 
					    START_SINGLE_LINE_COMMENT =  Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
 | 
				
			||||||
      "^\s*#{Regexp.escape(c)} "
 | 
					      "\s*#{Regexp.escape(c)} "
 | 
				
			||||||
    }.join("|"))
 | 
					    }.join("|"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    START_MULTI_LINE_COMMENT =  Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
 | 
					    START_MULTI_LINE_COMMENT =  Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
 | 
				
			||||||
@@ -58,16 +61,16 @@ module Linguist
 | 
				
			|||||||
          end
 | 
					          end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Single line comment
 | 
					        # Single line comment
 | 
				
			||||||
        elsif token = s.scan(START_SINGLE_LINE_COMMENT)
 | 
					        elsif s.beginning_of_line? && token = s.scan(START_SINGLE_LINE_COMMENT)
 | 
				
			||||||
          tokens << token.strip
 | 
					          # tokens << token.strip
 | 
				
			||||||
          s.skip_until(/\n|\Z/)
 | 
					          s.skip_until(/\n|\Z/)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Multiline comments
 | 
					        # Multiline comments
 | 
				
			||||||
        elsif token = s.scan(START_MULTI_LINE_COMMENT)
 | 
					        elsif token = s.scan(START_MULTI_LINE_COMMENT)
 | 
				
			||||||
          tokens << token
 | 
					          # tokens << token
 | 
				
			||||||
          close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
 | 
					          close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
 | 
				
			||||||
          s.skip_until(Regexp.compile(Regexp.escape(close_token)))
 | 
					          s.skip_until(Regexp.compile(Regexp.escape(close_token)))
 | 
				
			||||||
          tokens << close_token
 | 
					          # tokens << close_token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Skip single or double quoted strings
 | 
					        # Skip single or double quoted strings
 | 
				
			||||||
        elsif s.scan(/"/)
 | 
					        elsif s.scan(/"/)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,15 +34,15 @@ class TestTokenizer < Test::Unit::TestCase
 | 
				
			|||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_skip_comments
 | 
					  def test_skip_comments
 | 
				
			||||||
    assert_equal %w(foo #), tokenize("foo\n# Comment")
 | 
					    assert_equal %w(foo), tokenize("foo\n# Comment")
 | 
				
			||||||
    assert_equal %w(foo # bar), tokenize("foo\n# Comment\nbar")
 | 
					    assert_equal %w(foo bar), tokenize("foo\n# Comment\nbar")
 | 
				
			||||||
    assert_equal %w(foo //), tokenize("foo\n// Comment")
 | 
					    assert_equal %w(foo), tokenize("foo\n// Comment")
 | 
				
			||||||
    assert_equal %w(foo /* */), tokenize("foo /* Comment */")
 | 
					    assert_equal %w(foo), tokenize("foo /* Comment */")
 | 
				
			||||||
    assert_equal %w(foo /* */), tokenize("foo /* \nComment\n */")
 | 
					    assert_equal %w(foo), tokenize("foo /* \nComment\n */")
 | 
				
			||||||
    assert_equal %w(foo <!-- -->), tokenize("foo <!-- Comment -->")
 | 
					    assert_equal %w(foo), tokenize("foo <!-- Comment -->")
 | 
				
			||||||
    assert_equal %w(foo {- -}), tokenize("foo {- Comment -}")
 | 
					    assert_equal %w(foo), tokenize("foo {- Comment -}")
 | 
				
			||||||
    assert_equal %w(foo \(* *\)), tokenize("foo (* Comment *)")
 | 
					    assert_equal %w(foo), tokenize("foo (* Comment *)")
 | 
				
			||||||
    assert_equal %w(% %), tokenize("2 % 10\n% Comment")
 | 
					    assert_equal %w(%), tokenize("2 % 10\n% Comment")
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_sgml_tags
 | 
					  def test_sgml_tags
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user