mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Improve operator tokenizing
This commit is contained in:
@@ -74,18 +74,22 @@ module Linguist
|
||||
s.skip_until(/[^\\]'/)
|
||||
|
||||
# Skip number literals
|
||||
elsif s.scan(/\d+/)
|
||||
elsif s.scan(/(0x)?\d+/)
|
||||
|
||||
# SGML style brackets
|
||||
elsif token = s.scan(/<[^>]+>/)
|
||||
elsif token = s.scan(/<[^\s<>][^<>]*>/)
|
||||
extract_sgml_tokens(token).each { |t| tokens << t }
|
||||
|
||||
# Common programming punctuation
|
||||
elsif token = s.scan(/;|\{|\}|\(|\)|<<?|\+/)
|
||||
elsif token = s.scan(/;|\{|\}|\(|\)/)
|
||||
tokens << token
|
||||
|
||||
# Regular token
|
||||
elsif token = s.scan(/[\w\.@#\/]+/)
|
||||
elsif token = s.scan(/[\w\.@#\/\*]+/)
|
||||
tokens << token
|
||||
|
||||
# Common operators
|
||||
elsif token = s.scan(/<<?|\+|\-|\*|\/|&&?|\|\|?/)
|
||||
tokens << token
|
||||
|
||||
else
|
||||
|
||||
@@ -25,6 +25,7 @@ class TestTokenizer < Test::Unit::TestCase
|
||||
def test_skip_number_literals
|
||||
assert_equal %w(+), tokenize('1 + 1')
|
||||
assert_equal %w(add \( \)), tokenize('add(123, 456)')
|
||||
assert_equal %w(|), tokenize('0x01 | 0x10')
|
||||
end
|
||||
|
||||
def test_skip_comments
|
||||
@@ -47,20 +48,33 @@ class TestTokenizer < Test::Unit::TestCase
|
||||
assert_equal %w(<?xml> version=), tokenize("<?xml version=\"1.0\"?>")
|
||||
end
|
||||
|
||||
def test_operators
|
||||
assert_equal %w(+), tokenize("1 + 1")
|
||||
assert_equal %w(-), tokenize("1 - 1")
|
||||
assert_equal %w(*), tokenize("1 * 1")
|
||||
assert_equal %w(/), tokenize("1 / 1")
|
||||
assert_equal %w(&), tokenize("1 & 1")
|
||||
assert_equal %w(&&), tokenize("1 && 1")
|
||||
assert_equal %w(|), tokenize("1 | 1")
|
||||
assert_equal %w(||), tokenize("1 || 1")
|
||||
assert_equal %w(<), tokenize("1 < 0x01")
|
||||
assert_equal %w(<<), tokenize("1 << 0x01")
|
||||
end
|
||||
|
||||
def test_c_tokens
|
||||
assert_equal %w(#ifndef HELLO_H #define HELLO_H void hello \( \) ; #endif), tokenize(:"c/hello.h")
|
||||
assert_equal %w(#include <stdio.h> int main \( \) { printf \( \) ; return ; }), tokenize(:"c/hello.c")
|
||||
end
|
||||
|
||||
def test_cpp_tokens
|
||||
assert_equal %w(class Bar { protected char name ; public void hello \( \) ; }), tokenize(:"cpp/bar.h")
|
||||
assert_equal %w(class Bar { protected char *name ; public void hello \( \) ; }), tokenize(:"cpp/bar.h")
|
||||
assert_equal %w(#include <iostream> using namespace std ; int main \( \) { cout << << endl ; }), tokenize(:"cpp/hello.cpp")
|
||||
end
|
||||
|
||||
def test_objective_c_tokens
|
||||
assert_equal %w(#import <Foundation/Foundation.h> @interface Foo NSObject { } @end), tokenize(:"objective-c/Foo.h")
|
||||
assert_equal %w(#import @implementation Foo @end), tokenize(:"objective-c/Foo.m")
|
||||
assert_equal %w(#import <Cocoa/Cocoa.h> int main \( int argc char argv \) { NSLog \( @ \) ; return ; }), tokenize(:"objective-c/hello.m")
|
||||
assert_equal %w(#import <Cocoa/Cocoa.h> int main \( int argc char *argv \) { NSLog \( @ \) ; return ; }), tokenize(:"objective-c/hello.m")
|
||||
end
|
||||
|
||||
def test_javascript_tokens
|
||||
|
||||
Reference in New Issue
Block a user