diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb index 4e641667..9c6779af 100644 --- a/lib/linguist/tokenizer.rb +++ b/lib/linguist/tokenizer.rb @@ -73,12 +73,15 @@ module Linguist elsif s.scan(/'/) s.skip_until(/[^\\]'/) + # Skip number literals + elsif s.scan(/\d+/) + # SGML style brackets elsif token = s.scan(/<[^>]+>/) extract_sgml_tokens(token).each { |t| tokens << t } # Common programming punctuation - elsif token = s.scan(/;|\{|\}|\(|\)|< int main \( \) { printf \( \) ; return 0 ; }), tokenize(:"c/hello.c") + assert_equal %w(#ifndef HELLO_H #define HELLO_H void hello \( \) ; #endif), tokenize(:"c/hello.h") + assert_equal %w(#include int main \( \) { printf \( \) ; return ; }), tokenize(:"c/hello.c") end def test_cpp_tokens @@ -54,7 +60,7 @@ class TestTokenizer < Test::Unit::TestCase def test_objective_c_tokens assert_equal %w(#import @interface Foo NSObject { } @end), tokenize(:"objective-c/Foo.h") assert_equal %w(#import @implementation Foo @end), tokenize(:"objective-c/Foo.m") - assert_equal %w(#import int main \( int argc char argv \) { NSLog \( @ \) ; return 0 ; }), tokenize(:"objective-c/hello.m") + assert_equal %w(#import int main \( int argc char argv \) { NSLog \( @ \) ; return ; }), tokenize(:"objective-c/hello.m") end def test_javascript_tokens