diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb index 0123f385..64a82e67 100644 --- a/lib/linguist/tokenizer.rb +++ b/lib/linguist/tokenizer.rb @@ -96,7 +96,7 @@ module Linguist end # Skip number literals - elsif s.scan(/(0x)?\d(\d|\.)*/) + elsif s.scan(/(0x\h(\h|\.)*|\d(\d|\.)*)([uU][lL]{0,2}|([eE][-+]\d*)?[fFlL]*)/) # SGML style brackets elsif token = s.scan(/<[^\s<>][^<>]*>/) diff --git a/test/test_tokenizer.rb b/test/test_tokenizer.rb index 75f854a9..b8b486cb 100644 --- a/test/test_tokenizer.rb +++ b/test/test_tokenizer.rb @@ -25,6 +25,10 @@ class TestTokenizer < Minitest::Test assert_equal %w(add \( \)), tokenize('add(123, 456)') assert_equal %w(|), tokenize('0x01 | 0x10') assert_equal %w(*), tokenize('500.42 * 1.0') + assert_equal %w(), tokenize('1.23e-04') + assert_equal %w(), tokenize('1.0f') + assert_equal %w(), tokenize('1234ULL') + assert_equal %w(G1 X55 Y5 F2000), tokenize('G1 X55 Y5 F2000') end def test_skip_comments