mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-12-08 20:38:47 +00:00
@@ -1,3 +1,5 @@
|
||||
require 'strscan'
|
||||
|
||||
module Linguist
|
||||
# Generic programming language tokenizer.
|
||||
#
|
||||
@@ -50,8 +52,13 @@ module Linguist
|
||||
|
||||
tokens = []
|
||||
until s.eos?
|
||||
if token = s.scan(/^#!.+$/)
|
||||
if name = extract_shebang(token)
|
||||
tokens << "SHEBANG#!#{name}"
|
||||
end
|
||||
|
||||
# Single line comment
|
||||
if token = s.scan(START_SINGLE_LINE_COMMENT)
|
||||
elsif token = s.scan(START_SINGLE_LINE_COMMENT)
|
||||
tokens << token.strip
|
||||
s.skip_until(/\n|\Z/)
|
||||
|
||||
@@ -103,6 +110,33 @@ module Linguist
|
||||
tokens
|
||||
end
|
||||
|
||||
# Internal: Extract normalized shebang command token.
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# extract_shebang("#!/usr/bin/ruby")
|
||||
# # => "ruby"
|
||||
#
|
||||
# extract_shebang("#!/usr/bin/env node")
|
||||
# # => "node"
|
||||
#
|
||||
# Returns String token or nil it couldn't be parsed.
|
||||
def extract_shebang(data)
|
||||
s = StringScanner.new(data)
|
||||
|
||||
if path = s.scan(/^#!\s*\S+/)
|
||||
script = path.split('/').last
|
||||
if script == 'env'
|
||||
s.scan(/\s+/)
|
||||
script = s.scan(/\S+/)
|
||||
end
|
||||
script = script[/[^\d]+/, 0]
|
||||
return script
|
||||
end
|
||||
|
||||
nil
|
||||
end
|
||||
|
||||
# Internal: Extract tokens from inside SGML tag.
|
||||
#
|
||||
# data - SGML tag String.
|
||||
|
||||
Reference in New Issue
Block a user