mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			74 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Ragel
		
	
	
	
	
	
			
		
		
	
	
			74 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Ragel
		
	
	
	
	
	
=begin
 | 
						|
%%{
 | 
						|
  machine simple_tokenizer;
 | 
						|
 | 
						|
  action MyTs {
 | 
						|
    my_ts = p
 | 
						|
  }
 | 
						|
  action MyTe {
 | 
						|
    my_te = p
 | 
						|
  }
 | 
						|
  action Emit {
 | 
						|
    emit data[my_ts...my_te].pack('c*')
 | 
						|
    my_ts = nil
 | 
						|
    my_te = nil    
 | 
						|
  }
 | 
						|
 | 
						|
  foo = 'STARTFOO' any+ >MyTs :>> 'ENDFOO' >MyTe %Emit;
 | 
						|
  main := ( foo | any+ )*;
 | 
						|
 | 
						|
}%%
 | 
						|
=end
 | 
						|
 | 
						|
# Scans a file for "STARTFOO[...]ENDFOO" blocks and outputs their contents.
 | 
						|
#
 | 
						|
# ENV['CHUNK_SIZE'] determines how much of the file to read in at a time, allowing you to control memory usage.
 | 
						|
#
 | 
						|
# Does not use ragel's scanner functionality because no backtracking is needed.
 | 
						|
class SimpleTokenizer
 | 
						|
  attr_reader :path
 | 
						|
 | 
						|
  def initialize(path)
 | 
						|
    @path = path
 | 
						|
    %% write data;
 | 
						|
    # % (this fixes syntax highlighting)
 | 
						|
  end
 | 
						|
 | 
						|
  def emit(foo)
 | 
						|
    $stdout.puts foo
 | 
						|
  end
 | 
						|
 | 
						|
  def perform
 | 
						|
    # So that ragel doesn't try to get it from data.length
 | 
						|
    pe = :ignored
 | 
						|
    eof = :ignored
 | 
						|
 | 
						|
    %% write init;
 | 
						|
    # % (this fixes syntax highlighting)
 | 
						|
 | 
						|
    leftover = []
 | 
						|
    my_ts = nil
 | 
						|
    my_te = nil
 | 
						|
    
 | 
						|
    File.open(path) do |f|
 | 
						|
      while chunk = f.read(ENV['CHUNK_SIZE'].to_i)
 | 
						|
        data = leftover + chunk.unpack('c*')
 | 
						|
        p = 0
 | 
						|
        pe = data.length
 | 
						|
        %% write exec;
 | 
						|
        # % (this fixes syntax highlighting)
 | 
						|
        if my_ts
 | 
						|
          leftover = data[my_ts..-1]
 | 
						|
          my_te = my_te - my_ts if my_te
 | 
						|
          my_ts = 0
 | 
						|
        else
 | 
						|
          leftover = []
 | 
						|
        end
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
s = SimpleTokenizer.new ARGV[0]
 | 
						|
s.perform
 |