mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Improve operator tokenizing
This commit is contained in:
		| @@ -74,18 +74,22 @@ module Linguist | ||||
|           s.skip_until(/[^\\]'/) | ||||
|  | ||||
|         # Skip number literals | ||||
|         elsif s.scan(/\d+/) | ||||
|         elsif s.scan(/(0x)?\d+/) | ||||
|  | ||||
|         # SGML style brackets | ||||
|         elsif token = s.scan(/<[^>]+>/) | ||||
|         elsif token = s.scan(/<[^\s<>][^<>]*>/) | ||||
|           extract_sgml_tokens(token).each { |t| tokens << t } | ||||
|  | ||||
|         # Common programming punctuation | ||||
|         elsif token = s.scan(/;|\{|\}|\(|\)|<<?|\+/) | ||||
|         elsif token = s.scan(/;|\{|\}|\(|\)/) | ||||
|           tokens << token | ||||
|  | ||||
|         # Regular token | ||||
|         elsif token = s.scan(/[\w\.@#\/]+/) | ||||
|         elsif token = s.scan(/[\w\.@#\/\*]+/) | ||||
|           tokens << token | ||||
|  | ||||
|         # Common operators | ||||
|         elsif token = s.scan(/<<?|\+|\-|\*|\/|&&?|\|\|?/) | ||||
|           tokens << token | ||||
|  | ||||
|         else | ||||
|   | ||||
| @@ -25,6 +25,7 @@ class TestTokenizer < Test::Unit::TestCase | ||||
|   def test_skip_number_literals | ||||
|     assert_equal %w(+), tokenize('1 + 1') | ||||
|     assert_equal %w(add \( \)), tokenize('add(123, 456)') | ||||
|     assert_equal %w(|), tokenize('0x01 | 0x10') | ||||
|   end | ||||
|  | ||||
|   def test_skip_comments | ||||
| @@ -47,20 +48,33 @@ class TestTokenizer < Test::Unit::TestCase | ||||
|     assert_equal %w(<?xml> version=), tokenize("<?xml version=\"1.0\"?>") | ||||
|   end | ||||
|  | ||||
|   def test_operators | ||||
|     assert_equal %w(+), tokenize("1 + 1") | ||||
|     assert_equal %w(-), tokenize("1 - 1") | ||||
|     assert_equal %w(*), tokenize("1 * 1") | ||||
|     assert_equal %w(/), tokenize("1 / 1") | ||||
|     assert_equal %w(&), tokenize("1 & 1") | ||||
|     assert_equal %w(&&), tokenize("1 && 1") | ||||
|     assert_equal %w(|), tokenize("1 | 1") | ||||
|     assert_equal %w(||), tokenize("1 || 1") | ||||
|     assert_equal %w(<), tokenize("1 < 0x01") | ||||
|     assert_equal %w(<<), tokenize("1 << 0x01") | ||||
|   end | ||||
|  | ||||
|   def test_c_tokens | ||||
|     assert_equal %w(#ifndef HELLO_H #define HELLO_H void hello \( \) ; #endif), tokenize(:"c/hello.h") | ||||
|     assert_equal %w(#include <stdio.h> int main \( \) { printf \( \) ; return ; }), tokenize(:"c/hello.c") | ||||
|   end | ||||
|  | ||||
|   def test_cpp_tokens | ||||
|     assert_equal %w(class Bar { protected char name ; public void hello \( \) ; }), tokenize(:"cpp/bar.h") | ||||
|     assert_equal %w(class Bar { protected char *name ; public void hello \( \) ; }), tokenize(:"cpp/bar.h") | ||||
|     assert_equal %w(#include <iostream> using namespace std ; int main \( \) { cout << << endl ; }), tokenize(:"cpp/hello.cpp") | ||||
|   end | ||||
|  | ||||
|   def test_objective_c_tokens | ||||
|     assert_equal %w(#import <Foundation/Foundation.h> @interface Foo NSObject { } @end), tokenize(:"objective-c/Foo.h") | ||||
|     assert_equal %w(#import @implementation Foo @end), tokenize(:"objective-c/Foo.m") | ||||
|     assert_equal %w(#import <Cocoa/Cocoa.h> int main \( int argc char argv \) { NSLog \( @ \) ; return ; }), tokenize(:"objective-c/hello.m") | ||||
|     assert_equal %w(#import <Cocoa/Cocoa.h> int main \( int argc char *argv \) { NSLog \( @ \) ; return ; }), tokenize(:"objective-c/hello.m") | ||||
|   end | ||||
|  | ||||
|   def test_javascript_tokens | ||||
|   | ||||
		Reference in New Issue
	
	Block a user