mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-12-07 20:08:48 +00:00
Extract single and multi line comment parser
This commit is contained in:
@@ -24,6 +24,27 @@ module Linguist
|
||||
extract_tokens(data)
|
||||
end
|
||||
|
||||
SINGLE_LINE_COMMENTS = [
|
||||
'//', # C
|
||||
'#', # Ruby
|
||||
'%', # Tex
|
||||
]
|
||||
|
||||
MULTI_LINE_COMMENTS = [
|
||||
['/*', '*/'], # C
|
||||
['<!--', '-->'], # XML
|
||||
['{-', '-}'], # Haskell
|
||||
['(*', '*)'] # Coq
|
||||
]
|
||||
|
||||
START_SINGLE_LINE_COMMENT = Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
|
||||
"^\s*#{Regexp.escape(c)} "
|
||||
}.join("|"))
|
||||
|
||||
START_MULTI_LINE_COMMENT = Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
|
||||
Regexp.escape(c[0])
|
||||
}.join("|"))
|
||||
|
||||
# Internal: Extract generic tokens from data.
|
||||
#
|
||||
# data - String to scan.
|
||||
@@ -39,44 +60,17 @@ module Linguist
|
||||
|
||||
tokens = []
|
||||
until s.eos?
|
||||
# Ruby single line comment
|
||||
if token = s.scan(/# /)
|
||||
tokens << "#"
|
||||
# Single line comment
|
||||
if token = s.scan(START_SINGLE_LINE_COMMENT)
|
||||
tokens << token.strip
|
||||
s.skip_until(/\n|\Z/)
|
||||
|
||||
# C style single line comment
|
||||
elsif token = s.scan(/\/\/ /)
|
||||
tokens << "//"
|
||||
s.skip_until(/\n|\Z/)
|
||||
|
||||
# Leading Tex or Matlab comments
|
||||
elsif token = s.scan(/\n%/)
|
||||
tokens << "%"
|
||||
s.skip_until(/\n|\Z/)
|
||||
|
||||
# C multiline comments
|
||||
elsif token = s.scan(/\/\*/)
|
||||
tokens << "/*"
|
||||
s.skip_until(/\*\//)
|
||||
tokens << "*/"
|
||||
|
||||
# Haskell multiline comments
|
||||
elsif token = s.scan(/\{-/)
|
||||
tokens << "{-"
|
||||
s.skip_until(/-\}/)
|
||||
tokens << "-}"
|
||||
|
||||
# XML multiline comments
|
||||
elsif token = s.scan(/<!--/)
|
||||
tokens << "<!--"
|
||||
s.skip_until(/-->/)
|
||||
tokens << "-->"
|
||||
|
||||
# Coq multiline comments
|
||||
elsif token = s.scan(/\(\*/)
|
||||
tokens << "(*"
|
||||
s.skip_until(/\*\)/)
|
||||
tokens << "*)"
|
||||
# Multiline comments
|
||||
elsif token = s.scan(START_MULTI_LINE_COMMENT)
|
||||
tokens << token
|
||||
close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
|
||||
s.skip_until(Regexp.compile(Regexp.escape(close_token)))
|
||||
tokens << close_token
|
||||
|
||||
# Skip single or double quoted strings
|
||||
elsif s.scan(/"/)
|
||||
|
||||
Reference in New Issue
Block a user