mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 01:30:22 +00:00
* Removing FORTRAN samples because OS X case-insensitive filesystems :-\ * Adding Fotran samples back * FORTRAN -> Fortran * Groff -> Roff * GAS -> Unix Assembly * Cucumber -> Gherkin * Nimrod -> Nim * Ragel in Ruby Host -> Ragel * Jade -> Pug * VimL -> Vim script
74 lines
1.4 KiB
Ragel
74 lines
1.4 KiB
Ragel
=begin
|
|
%%{
|
|
machine simple_tokenizer;
|
|
|
|
action MyTs {
|
|
my_ts = p
|
|
}
|
|
action MyTe {
|
|
my_te = p
|
|
}
|
|
action Emit {
|
|
emit data[my_ts...my_te].pack('c*')
|
|
my_ts = nil
|
|
my_te = nil
|
|
}
|
|
|
|
foo = 'STARTFOO' any+ >MyTs :>> 'ENDFOO' >MyTe %Emit;
|
|
main := ( foo | any+ )*;
|
|
|
|
}%%
|
|
=end
|
|
|
|
# Scans a file for "STARTFOO[...]ENDFOO" blocks and outputs their contents.
|
|
#
|
|
# ENV['CHUNK_SIZE'] determines how much of the file to read in at a time, allowing you to control memory usage.
|
|
#
|
|
# Does not use ragel's scanner functionality because no backtracking is needed.
|
|
class SimpleTokenizer
|
|
attr_reader :path
|
|
|
|
def initialize(path)
|
|
@path = path
|
|
%% write data;
|
|
# % (this fixes syntax highlighting)
|
|
end
|
|
|
|
def emit(foo)
|
|
$stdout.puts foo
|
|
end
|
|
|
|
def perform
|
|
# So that ragel doesn't try to get it from data.length
|
|
pe = :ignored
|
|
eof = :ignored
|
|
|
|
%% write init;
|
|
# % (this fixes syntax highlighting)
|
|
|
|
leftover = []
|
|
my_ts = nil
|
|
my_te = nil
|
|
|
|
File.open(path) do |f|
|
|
while chunk = f.read(ENV['CHUNK_SIZE'].to_i)
|
|
data = leftover + chunk.unpack('c*')
|
|
p = 0
|
|
pe = data.length
|
|
%% write exec;
|
|
# % (this fixes syntax highlighting)
|
|
if my_ts
|
|
leftover = data[my_ts..-1]
|
|
my_te = my_te - my_ts if my_te
|
|
my_ts = 0
|
|
else
|
|
leftover = []
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
s = SimpleTokenizer.new ARGV[0]
|
|
s.perform
|