Use negative lookbehind when tokenizing string literals

This can double the speed of tokenizing large RTF files that use \'hh
escape sequences.
This commit is contained in:
Adam Roben
2015-11-05 10:18:44 -05:00
parent 362d300cb0
commit fea8bb21a0

View File

@@ -86,13 +86,13 @@ module Linguist
if s.peek(1) == "\"" if s.peek(1) == "\""
s.getch s.getch
else else
s.skip_until(/[^\\]"/) s.skip_until(/(?<!\\)"/)
end end
elsif s.scan(/'/) elsif s.scan(/'/)
if s.peek(1) == "'" if s.peek(1) == "'"
s.getch s.getch
else else
s.skip_until(/[^\\]'/) s.skip_until(/(?<!\\)'/)
end end
# Skip number literals # Skip number literals