mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Improve operator tokenizing
This commit is contained in:
		@@ -74,18 +74,22 @@ module Linguist
 | 
				
			|||||||
          s.skip_until(/[^\\]'/)
 | 
					          s.skip_until(/[^\\]'/)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Skip number literals
 | 
					        # Skip number literals
 | 
				
			||||||
        elsif s.scan(/\d+/)
 | 
					        elsif s.scan(/(0x)?\d+/)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # SGML style brackets
 | 
					        # SGML style brackets
 | 
				
			||||||
        elsif token = s.scan(/<[^>]+>/)
 | 
					        elsif token = s.scan(/<[^\s<>][^<>]*>/)
 | 
				
			||||||
          extract_sgml_tokens(token).each { |t| tokens << t }
 | 
					          extract_sgml_tokens(token).each { |t| tokens << t }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Common programming punctuation
 | 
					        # Common programming punctuation
 | 
				
			||||||
        elsif token = s.scan(/;|\{|\}|\(|\)|<<?|\+/)
 | 
					        elsif token = s.scan(/;|\{|\}|\(|\)/)
 | 
				
			||||||
          tokens << token
 | 
					          tokens << token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Regular token
 | 
					        # Regular token
 | 
				
			||||||
        elsif token = s.scan(/[\w\.@#\/]+/)
 | 
					        elsif token = s.scan(/[\w\.@#\/\*]+/)
 | 
				
			||||||
 | 
					          tokens << token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Common operators
 | 
				
			||||||
 | 
					        elsif token = s.scan(/<<?|\+|\-|\*|\/|&&?|\|\|?/)
 | 
				
			||||||
          tokens << token
 | 
					          tokens << token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        else
 | 
					        else
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -25,6 +25,7 @@ class TestTokenizer < Test::Unit::TestCase
 | 
				
			|||||||
  def test_skip_number_literals
 | 
					  def test_skip_number_literals
 | 
				
			||||||
    assert_equal %w(+), tokenize('1 + 1')
 | 
					    assert_equal %w(+), tokenize('1 + 1')
 | 
				
			||||||
    assert_equal %w(add \( \)), tokenize('add(123, 456)')
 | 
					    assert_equal %w(add \( \)), tokenize('add(123, 456)')
 | 
				
			||||||
 | 
					    assert_equal %w(|), tokenize('0x01 | 0x10')
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_skip_comments
 | 
					  def test_skip_comments
 | 
				
			||||||
@@ -47,20 +48,33 @@ class TestTokenizer < Test::Unit::TestCase
 | 
				
			|||||||
    assert_equal %w(<?xml> version=), tokenize("<?xml version=\"1.0\"?>")
 | 
					    assert_equal %w(<?xml> version=), tokenize("<?xml version=\"1.0\"?>")
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def test_operators
 | 
				
			||||||
 | 
					    assert_equal %w(+), tokenize("1 + 1")
 | 
				
			||||||
 | 
					    assert_equal %w(-), tokenize("1 - 1")
 | 
				
			||||||
 | 
					    assert_equal %w(*), tokenize("1 * 1")
 | 
				
			||||||
 | 
					    assert_equal %w(/), tokenize("1 / 1")
 | 
				
			||||||
 | 
					    assert_equal %w(&), tokenize("1 & 1")
 | 
				
			||||||
 | 
					    assert_equal %w(&&), tokenize("1 && 1")
 | 
				
			||||||
 | 
					    assert_equal %w(|), tokenize("1 | 1")
 | 
				
			||||||
 | 
					    assert_equal %w(||), tokenize("1 || 1")
 | 
				
			||||||
 | 
					    assert_equal %w(<), tokenize("1 < 0x01")
 | 
				
			||||||
 | 
					    assert_equal %w(<<), tokenize("1 << 0x01")
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_c_tokens
 | 
					  def test_c_tokens
 | 
				
			||||||
    assert_equal %w(#ifndef HELLO_H #define HELLO_H void hello \( \) ; #endif), tokenize(:"c/hello.h")
 | 
					    assert_equal %w(#ifndef HELLO_H #define HELLO_H void hello \( \) ; #endif), tokenize(:"c/hello.h")
 | 
				
			||||||
    assert_equal %w(#include <stdio.h> int main \( \) { printf \( \) ; return ; }), tokenize(:"c/hello.c")
 | 
					    assert_equal %w(#include <stdio.h> int main \( \) { printf \( \) ; return ; }), tokenize(:"c/hello.c")
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_cpp_tokens
 | 
					  def test_cpp_tokens
 | 
				
			||||||
    assert_equal %w(class Bar { protected char name ; public void hello \( \) ; }), tokenize(:"cpp/bar.h")
 | 
					    assert_equal %w(class Bar { protected char *name ; public void hello \( \) ; }), tokenize(:"cpp/bar.h")
 | 
				
			||||||
    assert_equal %w(#include <iostream> using namespace std ; int main \( \) { cout << << endl ; }), tokenize(:"cpp/hello.cpp")
 | 
					    assert_equal %w(#include <iostream> using namespace std ; int main \( \) { cout << << endl ; }), tokenize(:"cpp/hello.cpp")
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_objective_c_tokens
 | 
					  def test_objective_c_tokens
 | 
				
			||||||
    assert_equal %w(#import <Foundation/Foundation.h> @interface Foo NSObject { } @end), tokenize(:"objective-c/Foo.h")
 | 
					    assert_equal %w(#import <Foundation/Foundation.h> @interface Foo NSObject { } @end), tokenize(:"objective-c/Foo.h")
 | 
				
			||||||
    assert_equal %w(#import @implementation Foo @end), tokenize(:"objective-c/Foo.m")
 | 
					    assert_equal %w(#import @implementation Foo @end), tokenize(:"objective-c/Foo.m")
 | 
				
			||||||
    assert_equal %w(#import <Cocoa/Cocoa.h> int main \( int argc char argv \) { NSLog \( @ \) ; return ; }), tokenize(:"objective-c/hello.m")
 | 
					    assert_equal %w(#import <Cocoa/Cocoa.h> int main \( int argc char *argv \) { NSLog \( @ \) ; return ; }), tokenize(:"objective-c/hello.m")
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def test_javascript_tokens
 | 
					  def test_javascript_tokens
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user