mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-12-08 20:38:47 +00:00
Skip emiting comment tokens
This commit is contained in:
@@ -16,12 +16,15 @@ module Linguist
|
|||||||
new.extract_tokens(data)
|
new.extract_tokens(data)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Start state on token, ignore anything till the next newline
|
||||||
SINGLE_LINE_COMMENTS = [
|
SINGLE_LINE_COMMENTS = [
|
||||||
'//', # C
|
'//', # C
|
||||||
'#', # Ruby
|
'#', # Ruby
|
||||||
'%', # Tex
|
'%', # Tex
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Start state on opening token, ignore anything until the closing
|
||||||
|
# token is reached.
|
||||||
MULTI_LINE_COMMENTS = [
|
MULTI_LINE_COMMENTS = [
|
||||||
['/*', '*/'], # C
|
['/*', '*/'], # C
|
||||||
['<!--', '-->'], # XML
|
['<!--', '-->'], # XML
|
||||||
@@ -30,7 +33,7 @@ module Linguist
|
|||||||
]
|
]
|
||||||
|
|
||||||
START_SINGLE_LINE_COMMENT = Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
|
START_SINGLE_LINE_COMMENT = Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
|
||||||
"^\s*#{Regexp.escape(c)} "
|
"\s*#{Regexp.escape(c)} "
|
||||||
}.join("|"))
|
}.join("|"))
|
||||||
|
|
||||||
START_MULTI_LINE_COMMENT = Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
|
START_MULTI_LINE_COMMENT = Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
|
||||||
@@ -58,16 +61,16 @@ module Linguist
|
|||||||
end
|
end
|
||||||
|
|
||||||
# Single line comment
|
# Single line comment
|
||||||
elsif token = s.scan(START_SINGLE_LINE_COMMENT)
|
elsif s.beginning_of_line? && token = s.scan(START_SINGLE_LINE_COMMENT)
|
||||||
tokens << token.strip
|
# tokens << token.strip
|
||||||
s.skip_until(/\n|\Z/)
|
s.skip_until(/\n|\Z/)
|
||||||
|
|
||||||
# Multiline comments
|
# Multiline comments
|
||||||
elsif token = s.scan(START_MULTI_LINE_COMMENT)
|
elsif token = s.scan(START_MULTI_LINE_COMMENT)
|
||||||
tokens << token
|
# tokens << token
|
||||||
close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
|
close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
|
||||||
s.skip_until(Regexp.compile(Regexp.escape(close_token)))
|
s.skip_until(Regexp.compile(Regexp.escape(close_token)))
|
||||||
tokens << close_token
|
# tokens << close_token
|
||||||
|
|
||||||
# Skip single or double quoted strings
|
# Skip single or double quoted strings
|
||||||
elsif s.scan(/"/)
|
elsif s.scan(/"/)
|
||||||
|
|||||||
@@ -34,15 +34,15 @@ class TestTokenizer < Test::Unit::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
def test_skip_comments
|
def test_skip_comments
|
||||||
assert_equal %w(foo #), tokenize("foo\n# Comment")
|
assert_equal %w(foo), tokenize("foo\n# Comment")
|
||||||
assert_equal %w(foo # bar), tokenize("foo\n# Comment\nbar")
|
assert_equal %w(foo bar), tokenize("foo\n# Comment\nbar")
|
||||||
assert_equal %w(foo //), tokenize("foo\n// Comment")
|
assert_equal %w(foo), tokenize("foo\n// Comment")
|
||||||
assert_equal %w(foo /* */), tokenize("foo /* Comment */")
|
assert_equal %w(foo), tokenize("foo /* Comment */")
|
||||||
assert_equal %w(foo /* */), tokenize("foo /* \nComment\n */")
|
assert_equal %w(foo), tokenize("foo /* \nComment\n */")
|
||||||
assert_equal %w(foo <!-- -->), tokenize("foo <!-- Comment -->")
|
assert_equal %w(foo), tokenize("foo <!-- Comment -->")
|
||||||
assert_equal %w(foo {- -}), tokenize("foo {- Comment -}")
|
assert_equal %w(foo), tokenize("foo {- Comment -}")
|
||||||
assert_equal %w(foo \(* *\)), tokenize("foo (* Comment *)")
|
assert_equal %w(foo), tokenize("foo (* Comment *)")
|
||||||
assert_equal %w(% %), tokenize("2 % 10\n% Comment")
|
assert_equal %w(%), tokenize("2 % 10\n% Comment")
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_sgml_tags
|
def test_sgml_tags
|
||||||
|
|||||||
Reference in New Issue
Block a user