Files
linguist/samples/Ragel/simple_tokenizer.rl
Arfon Smith d8b91bd5c4 The grand language renaming bonanza (#3278)
* Removing FORTRAN samples because OS X case-insensitive filesystems :-\

* Adding Fotran samples back

* FORTRAN -> Fortran

* Groff -> Roff

* GAS -> Unix Assembly

* Cucumber -> Gherkin

* Nimrod -> Nim

* Ragel in Ruby Host -> Ragel

* Jade -> Pug

* VimL -> Vim script
2016-12-13 13:39:27 -08:00

74 lines
1.4 KiB
Ragel

=begin
%%{
machine simple_tokenizer;
action MyTs {
my_ts = p
}
action MyTe {
my_te = p
}
action Emit {
emit data[my_ts...my_te].pack('c*')
my_ts = nil
my_te = nil
}
foo = 'STARTFOO' any+ >MyTs :>> 'ENDFOO' >MyTe %Emit;
main := ( foo | any+ )*;
}%%
=end
# Scans a file for "STARTFOO[...]ENDFOO" blocks and outputs their contents.
#
# ENV['CHUNK_SIZE'] determines how much of the file to read in at a time, allowing you to control memory usage.
#
# Does not use ragel's scanner functionality because no backtracking is needed.
class SimpleTokenizer
attr_reader :path
def initialize(path)
@path = path
%% write data;
# % (this fixes syntax highlighting)
end
def emit(foo)
$stdout.puts foo
end
def perform
# So that ragel doesn't try to get it from data.length
pe = :ignored
eof = :ignored
%% write init;
# % (this fixes syntax highlighting)
leftover = []
my_ts = nil
my_te = nil
File.open(path) do |f|
while chunk = f.read(ENV['CHUNK_SIZE'].to_i)
data = leftover + chunk.unpack('c*')
p = 0
pe = data.length
%% write exec;
# % (this fixes syntax highlighting)
if my_ts
leftover = data[my_ts..-1]
my_te = my_te - my_ts if my_te
my_ts = 0
else
leftover = []
end
end
end
end
end
s = SimpleTokenizer.new ARGV[0]
s.perform