Slimming down Terra samples

This commit is contained in:
Arfon Smith
2016-03-10 06:50:48 -06:00
parent 2e9d8f5520
commit 6812a22706
366 changed files with 0 additions and 11582 deletions

View File

@@ -1,22 +0,0 @@
struct A {
a : int;
b : int;
}
function A.metamethods.__getentries(self)
print("GET ENTRIES")
for i,e in ipairs(self.entries) do
e.field = "foo"..e.field
end
return self.entries
end
terra foo()
var a : A
a.fooa = 3
a.foob = 4
return a.fooa + a.foob
end
assert(foo() == 7)

View File

@@ -1,17 +0,0 @@
do
import "lib/addlanguage"
local a = 4
local b = add a,3,4+5,(function() terra one() return a end return one() end)() end
local inception = add add 3,4,a end,4 end
local test = require("test")
test.eq(b,20)
test.eq(inception,15)
end
do
import "lib/addlanguage"
local c = add 4,5 end
assert(9 == c)
end

View File

@@ -1,18 +0,0 @@
a = terralib.new(int[4])
terra foo()
a[3] = 4
end
foo()
assert(4 == a[3])
terra bar()
a = array(5,6,7,8)
end
bar()
assert(a[3] == 8)

View File

@@ -1,24 +0,0 @@
C = terralib.includec("stdio.h")
terra foo()
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
C.printf("hello, world\n")
end
foo:getdefinitions()[1]:setinlined(true)
terra bar()
foo()
return 4
end
bar:compile()
bar:disas()

View File

@@ -1,7 +0,0 @@
local alignment = 16
local aligned = terralib.aligned
terra foobar(a : &float)
terralib.attrstore(a,terralib.attrload(a+3,{ align = alignment }), { align = alignment })
end
foobar:disas()

View File

@@ -1,9 +0,0 @@
local test = require("test")
terra foo(a : double, b : double, c : double) : bool
return a < b and b < c
end
test.eq(foo(1,2,3),true)
test.eq(foo(1,2,1),false)
test.eq(foo(2,1,2),false)

View File

@@ -1,26 +0,0 @@
local test = require("test")
local Num = int
local fib =
terra(a : Num) : Num
var i,c,p = 0,1,1
while i < a do
c,p = c + p,c
i = i + 1
end
return c
end
function fib2(a)
local i,c,p = 0,1,1
while i < a do
c,p = c + p,c
i = i + 1
end
return c
end
for i = 0,10 do
print(fib(i))
test.eq(fib(i),fib2(i))
end

View File

@@ -1,18 +0,0 @@
C,T = terralib.includecstring [[
struct D {
struct { int a; struct { int c; } c; } b;
} c;
typedef struct {int c;} B;
typedef struct {
struct { int a; } a;
struct { int b; } b;
B c;
} A;
]]
terra foo(a : &C.A)
var c = a.a
var b = a.b.b
return c.a + b + a.c.c;
end
foo:compile()

View File

@@ -1,12 +0,0 @@
local C = tuple(int,tuple(int,int))
terra anon()
var c : tuple(int,tuple(int,int)) = { 1, {2,3} }
var d : C = c
d._0 = 2
return d._0 + c._1._0 + c._1._1
end
test = require("test")
test.eq(anon(),7)

View File

@@ -1,7 +0,0 @@
struct A { a : &A }
terra foo()
var a : A
end
foo()

View File

@@ -1,12 +0,0 @@
function omgfunc()
return 2
end
terra foo()
return 1 + [{omgfunc()}] + [omgfunc()]
end
local test = require("test")
test.eq(foo(),5)

View File

@@ -1,13 +0,0 @@
function omgfunc()
return 4
end
local what = `[ {2,3} ]
terra foo()
return [{3,`[ {2,3} ]}]
end
local test = require("test")
test.meq({3,2,3},foo())

View File

@@ -1,16 +0,0 @@
function omgfunc()
return 2
end
a = global(0)
terra foo()
[quote a = a + 1 end];
[{quote a = a + 1 end,quote a = a + 1 end}]
return a
end
local test = require("test")
test.eq(foo(),3)

View File

@@ -1,11 +0,0 @@
terra foo()
var a = 4
return [a]
end
foo:printpretty()
local test = require("test")
test.eq(foo(),4)

View File

@@ -1,21 +0,0 @@
function makeloop(N,body)
return quote
for i = 0, N do
body
end
end
end
terra stuff()
var a = 0;
[makeloop(10,quote
a = a + 1
end)]
return a
end
print(stuff())

View File

@@ -1,60 +0,0 @@
terra bar()
var a : int[2]
a[0] = 1
a[1] = 2
return a
end
terra foo()
var a : int[4]
a[1], a[2] = 4,2
return a[1] + a[2]
end
terra foo2()
var b = bar()
return b[0] + b[1]
end
terra foo3()
return (bar())[0]
end
terra foo4()
var a : int[4]
a[3] = 7
var b = &a[0]
b[2] = 8
return b[2] + b[3]
end
terra bar2(a : &int)
a[1] = 100
return a[0]
end
terra foo5()
var a : int[4]
bar2(a)
return a[1]
end
terra foo6()
return bar2(bar())
end
terra foo7()
var a = array(1,2,3,4)
return a[1]+a[2]
end
local test = require("test")
test.eq(6,foo())
test.eq(3,foo2())
test.eq(1,foo3())
test.eq(15,foo4())
test.eq(100,foo5())
test.eq(1,foo6())
test.eq(5,foo7())

View File

@@ -1,29 +0,0 @@
terra foo()
var a = array(1,2,3)
return a[0] + a[1] + a[2]
end
terra foo2()
var a = array(1,2.5,3)
return a[1]
end
terra what()
return 3,4.5
end
local expand = macro(function(a) return {`a._0,`a._1} end)
terra foo3()
var w = what()
var a = array(1,2.5,expand(w))
return a[3]
end
terra foo4()
var a = array("what","magic","is","this")
return a[1][1]
end
local test = require("test")
test.eq(foo(),6)
test.eq(foo2(),2.5)
test.eq(foo3(),4.5)
test.eq(foo4(),97)

View File

@@ -1,82 +0,0 @@
local f = assert(io.popen("uname", 'r'))
local s = assert(f:read('*a'))
f:close()
if s~="Darwin\n" then
print("Warning, not running test b/c this isn't a mac")
return
end
C = terralib.includecstring [[
#include <stdio.h>
#include <stdlib.h>
]]
local arraytypes = {}
function Array(T)
if arraytypes[T] then return arraytypes[T] end
local struct ArrayImpl {
data : &T;
N : int;
}
arraytypes[T] = ArrayImpl
terra ArrayImpl:init(N : int)
self.data = [&T](C.malloc(N*sizeof(T)))
self.N = N
end
terra ArrayImpl:free()
C.free(self.data)
end
ArrayImpl.metamethods.__apply = macro(function(self,idx)
return `self.data[idx]
end)
ArrayImpl.metamethods.__methodmissing = macro(function(methodname,selfexp,...)
local args = terralib.newlist {...}
local params = args:map(function(a) return symbol(a:gettype()) end)
local terra elemfn(a : &T, [params])
return a:[methodname](params)
end
local RT = elemfn:gettype().returntype
return quote
var self = selfexp
var r : Array(RT)
r:init(self.N)
for i = 0,r.N do
r.data[i] = elemfn(&self.data[i],args)
end
in
r
end
end)
return ArrayImpl
end
local OC = require("lib/objc")
local IO = terralib.includec("stdio.h")
struct Rect {
a : double,
b : double,
c : double,
d : double
}
terra str(data : &uint8)
return OC.NSString:stringWithUTF8String(data)
end
terra main()
OC.NSAutoreleasePool:new()
var app = OC.NSApplication:sharedApplication()
var rec = Rect {0,0,200,200}
var windows : Array(OC.ID)
windows:init(2)
windows(0) = OC.NSWindow
windows(1) = OC.NSWindow
windows = windows:alloc():initWithContentRect_styleMask_backing_defer(rec,1,2,false)
windows:makeKeyAndOrderFront(nil)
IO.printf("entering run loop\n")
app:run()
end
terralib.linklibrary("/System/Library/Frameworks/Cocoa.framework/Cocoa")
main:compile()

View File

@@ -1,5 +0,0 @@
a = &int[4]
a = (&int)[4]
local test = require("test")
test.eq(false,a:ispointer())
test.eq(true,a:isarray())

View File

@@ -1,28 +0,0 @@
local a = ...
if not a then
--force MCJIT
os.execute("../terra -m asm.t true")
return
end
C = terralib.includec("stdio.h")
struct Vendor {
maxV : int;
b : int;
c : int;
d : int;
}
terra foo()
var r = terralib.asm(Vendor,"cpuid","={eax},={ebx},={ecx},={edx},{eax},~{dirflag},~{fpsr},~{flags}",false,0)
r.c,r.d = r.d,r.c
C.printf("%.12s\n", &r.b)
end
foo()
terra addone(a : int)
return terralib.asm(int,"addl $$1,$1","=r,0",true,a)
end
assert(addone(3) == 4)

View File

@@ -1,7 +0,0 @@
c = terralib.includec("stdlib.h")
terra what()
return c.atoi("52")
end
print(what())

View File

@@ -1,32 +0,0 @@
local haddavx = terralib.intrinsic("llvm.x86.avx.hadd.ps.256", { vector(float,8), vector(float,8) } -> vector(float,8))
terra hadd(v : vector(float,8))
var v1 = haddavx(v,v)
var v2 = haddavx(v1,v1)
return v2[0] + v2[4]
end
ffi = require("ffi")
local stdio = terralib.includec("stdio.h")
terra foobar(a : &float)
return hadd(@[&vector(float,8)](a))
end
dat = ffi.new("float[?] __attribute__((aligned(32)))",8)
for i = 1,8 do
dat[i-1] = i
end
if terralib.llvmversion == 31 then
print("ignoring...")
else
foobar:compile()
local test = require("test")
test.eq(foobar(dat),36)
end
--terralib.saveobj("avxhadd",{main = foobar})
--os.execute("./avxhadd")

View File

@@ -1,4 +0,0 @@
--tostring on a type should never error, so we assign it its original name if it does
struct A {}
A.metamethods.__typename = error
print(A)

View File

@@ -1,148 +0,0 @@
--[[
The Computer Language Benchmarks Game
http://shootout.alioth.debian.org/
contributed by Ledrug Katz
]]
local C = {
printf = terralib.externfunction("printf", terralib.types.funcpointer(rawstring,int,true)),
exit = terralib.externfunction("exit", int -> {}),
atoi = terralib.externfunction("atoi", rawstring -> int)
}
-- this depends highly on the platform. It might be faster to use
-- char type on 32-bit systems; it might be faster to use unsigned.
elem = int
s = global(elem[16])
t = global(elem[16])
maxflips = global(int)
max_n = global(int)
odd = global(bool)
checksum = global(int)
terra flip()
var i = max_n
var x : &elem = t
var y : &elem = s
var c : elem
while i > 0 do
i = i - 1
@x = @y
x = x + 1
y = y + 1
end
i = 1
repeat
x = t
y = t + t[0]
while x < y do
c = @x
@x = @y
x = x + 1
@y = c
y = y - 1
end
i = i + 1
until t[t[0]] == 0
--C.printf("flip %d\n",i);
return i
end
terra rotate(n : int)
var c = s[0]
for i = 0,n do
s[i] = s[i+1]
end
s[n] = c
--C.printf("rotate(%d) %d\n",n,c);
end
terra tk(n : int)
var i = 0
var f : int
var c : elem[16]
for i = 0,16 do
c[i] = 0
end
while i < n do
rotate(i)
if c[i] >= i then
c[i] = 0
i = i + 1
goto continue
end
c[i] = c[i] + 1
i = 1
odd = not odd
if s[0] ~= 0 then
if s[s[0]] ~= 0 then
f = flip()
else
f = 1
end
if f > maxflips then
maxflips = f
end
--C.printf("f = %d\n",f)
if odd then
checksum = checksum - f
else
checksum = checksum + f
end
end
::continue::
end
end
terra doit(N : int)
maxflips = 0
odd = false
checksum = 0
max_n = N
if max_n < 3 or max_n > 15 then
C.printf("range: must be 3 <= n <= 12\n")
C.exit(1)
end
for i = 0,max_n do
s[i] = i
end
tk(max_n)
C.printf("%d\nPfannkuchen(%d) = %d\n", checksum, max_n, maxflips)
return checksum
end
terra main(argc : int, v : &&int8)
if argc < 2 then
C.printf("usage: %s number\n", v[0])
C.exit(1);
end
doit(C.atoi(v[1]))
return 0
end
local test = require("test")
doit:compile()
print(test.time(function()
test.eq(doit(10),73196)
end))
terralib.saveobj("benchmark_fannkuchredux", { main = main } )

View File

@@ -1,60 +0,0 @@
local C = terralib.includec("stdio.h")
local function compile(code,N)
local function body(data,ptr)
local stmts = terralib.newlist()
local jumpstack = {}
for i = 1,#code do
local c = code:sub(i,i)
local stmt
if c == ">" then
stmt = quote ptr = ptr + 1 end
elseif c == "<" then
stmt = quote ptr = ptr - 1 end
elseif c == "+" then
stmt = quote data[ptr] = data[ptr] + 1 end
elseif c == "-" then
stmt = quote data[ptr] = data[ptr] - 1 end
elseif c == "." then
stmt = quote C.putchar(data[ptr]) end
elseif c == "," then
stmt = quote data[ptr] = C.getchar() end
elseif c == "[" then
local target = { before = symbol(), after = symbol() }
table.insert(jumpstack,target)
stmt = quote
::[target.before]::
if data[ptr] == 0 then
goto [target.after]
end
end
elseif c == "]" then
local target = table.remove(jumpstack)
assert(target)
stmt = quote
goto [target.before]
:: [target.after] ::
end
else
error("unknown character "..c)
end
stmts:insert(stmt)
end
return stmts
end
return terra()
var data : int[N]
for i = 0, N do
data[i] = 0
end
var ptr = 0;
[ body(data,ptr) ]
end
end
local helloworld = "++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>."
local fn = compile(helloworld,256)
fn()

View File

@@ -1,7 +0,0 @@
local a = quote
while false do end
end
terra foo()
return a
end
foo()

View File

@@ -1,23 +0,0 @@
terra foo()
var c = 3
escape
local a = 1
for i = 1,10 do
emit quote c = c + a end
end
end
return c
end
assert(foo() == 13)
terra foo2()
return escape emit(1) end
end
assert(1 == foo2())
a = terralib.newlist()
a:insert quote
foo()
end

View File

@@ -1,57 +0,0 @@
function symmat(name,I,...)
if not I then return symbol(name) end
local r = {}
for i = 1,I do
r[i] = symmat(name..tostring(i),...)
end
return r
end
terra min(a : int, b : int)
return terralib.select(a < b, a, b)
end
function blockedloop(bounds,sizes,bodyfn)
local indexes = symmat("i",#sizes,#bounds)
--local makeloop --bug local function doesn't add to set of live variables...
local function makeloop(s,b)
if s > #sizes then
return bodyfn(unpack(indexes[#sizes]))
elseif b > #bounds then
return makeloop(s + 1, 1)
else
local topbound = bounds[b]
local blocksize = sizes[s]
local begin,bound
if s == 1 then
begin,bound = 0, topbound
else
begin,bound = indexes[s-1][b], sizes[s-1]
end
local step = sizes[s]
return quote
for [indexes[s][b]] = begin,min(begin+bound,topbound),step do
[ makeloop(s,b+1) ]
end
end
end
end
return makeloop(1,1)
end
IO = terralib.includec("stdio.h")
terra main()
var M,N = 30,40;
[blockedloop({M,N}, {10,1}, function(m,n)
return quote
IO.printf("%d %d\n",m,n)
end
end)]
end
main:printpretty()
main()

View File

@@ -1,68 +0,0 @@
C = terralib.includec("stdio.h")
terra min(a: int, b: int) : int
if a < b then return a
else return b end
end
std = terralib.includec("stdlib.h")
function Image(PixelType)
local struct ImageImpl {
data : &PixelType,
N : int
}
terra ImageImpl:init(N: int): {} --returns nothing
self.data = [&PixelType](
std.malloc(N*N*sizeof(PixelType)))
self.N = N
end
terra ImageImpl:get(x: int, y: int) : PixelType
return self.data[x*self.N + y]
end
terra ImageImpl:set(x: int, y: int, v : PixelType)
self.data[x*self.N + y] = v
end
terra ImageImpl:save(i : rawstring)
for i = 0, 2 do for j = 0, 2 do
C.printf("%d %d %f\n", i,j,self:get(i,j))
end end
end
terra ImageImpl:load(i : rawstring) self:init(16)
for i = 0, 4 do for j = 0, 4 do
self:set(i,j,(i*4+j)%3)
--C.printf("%f\n",self:get(i,j))
end end
end
terra ImageImpl:free() end
--omitted methods for: set, save, load, free
return ImageImpl
end
GreyscaleImage = Image(float)
terra laplace(img: &GreyscaleImage,
out: &GreyscaleImage) : {}
--shrink result, do not calculate boundaries
var newN = img.N - 2
out:init(newN)
for i = 0,newN do
for j = 0,newN do
var v = img:get(i+0,j+1) + img:get(i+2,j+1)
+ img:get(i+1,j+2) + img:get(i+1,j+0)
- 4 * img:get(i+1,j+1)
out:set(i,j,v)
end
end
end
terra runlaplace(input: rawstring,
output: rawstring) : {}
var i: GreyscaleImage, o : GreyscaleImage
i:load(input)
laplace(&i,&o)
o:save(output)
i:free(); o:free()
end
runlaplace("myinput","myoutput")
terralib.saveobj("runlaplace.o",
{runlaplace = runlaplace})

View File

@@ -1,81 +0,0 @@
terra min(a : int, b : int)
return terralib.select(a < b, a, b)
end
function blockedloop(N,blocksizes,bodyfn)
local function generatelevel(n,ii,jj,bb)
if n > #blocksizes then
return bodyfn(ii,jj)
end
local blocksize = blocksizes[n]
return quote
for i = ii,min(ii+bb,N),blocksize do
for j = jj,min(jj+bb,N),blocksize do
[ generatelevel(n+1,i,j,blocksize) ]
end
end
end
end
return generatelevel(1,0,0,N)
end
IO = terralib.includec("stdio.h")
stdlib = terralib.includec("stdlib.h")
terra main()
var a : int[8][8]
var c = 0
var N = 8
[blockedloop(N, {4,2,1}, function(i,j)
return quote
--IO.printf("%d %d\n",i,j)
a[i][j] = c
c = c + 1
end
end)]
for i = 0,N do
for j = 0,N do
IO.printf("%d\t",a[i][j])
end
IO.printf("\n")
end
end
main()
function Image(Spectrum)
local struct ImageImpl {
data : &Spectrum,
N : int
}
terra ImageImpl:init(N : int)
self.data = [&float](stdlib.malloc(N*N*sizeof(Spectrum)))
self.N = N
end
ImageImpl.methods.pixel = macro(function(self,x,y)
return `self.data[x*self.N + y]
end)
return ImageImpl
end
GreyScaleImage = Image(float)
terra laplace(input : &GreyScaleImage, output : &GreyScaleImage)
var newN = input.N - 2 --shrink result since we do not calculate boundaries
output:init(newN);
[blockedloop(newN,{32,1},function(i,j)
return quote
output:pixel(i,j) =
input:pixel(i+0,j+1)
+ input:pixel(i+2,j+1)
+ input:pixel(i+1,j+2)
+ input:pixel(i+1,j+0)
- 4 * input:pixel(i+1,j+1)
end
end)]
end
laplace:compile()
laplace:printpretty()

View File

@@ -1,85 +0,0 @@
C = terralib.includec("stdio.h")
function blockedloop(N,blocksizes,bodyfn)
local function generatelevel(n,ii,jj,bb)
if n > #blocksizes then
return bodyfn(ii,jj)
end
local blocksize = blocksizes[n]
return quote
for i = ii,min(ii+bb,N),blocksize do
for j = jj,min(jj+bb,N),blocksize do
[ generatelevel(n+1,i,j,blocksize) ]
end
end
end
end
return generatelevel(1,0,0,N)
end
terra min(a: int, b: int) : int
if a < b then return a
else return b end
end
std = terralib.includec("stdlib.h")
function Image(PixelType)
local struct ImageImpl {
data : &PixelType,
N : int
}
terra ImageImpl:init(N: int): {} --returns nothing
self.data =
[&PixelType](std.malloc(N*N*sizeof(PixelType)))
self.N = N
end
terra ImageImpl:get(x: int, y: int) : PixelType
return self.data[x*self.N + y]
end
terra ImageImpl:set(x: int, y: int, v : PixelType)
self.data[x*self.N + y] = v
end
terra ImageImpl:save(i : rawstring)
for i = 0, 8 do for j = 0, 8 do
C.printf("%d %d %f\n", i,j,self:get(i,j))
end end
end
terra ImageImpl:load(i : rawstring) self:init(16)
for i = 0, 10 do for j = 0, 10 do
self:set(i,j,(i*4+j)%3)
--C.printf("%f\n",self:get(i,j))
end end
end
terra ImageImpl:free() end
--omitted methods for: set, save, load, free
return ImageImpl
end
GreyscaleImage = Image(float)
terra laplace(img: &GreyscaleImage,
out: &GreyscaleImage) : {}
--shrink result, do not calculate boundaries
var newN = img.N - 2
out:init(newN);
[blockedloop(newN,{4,2,1}, function(i,j)
return quote
var v = img:get(i+0,j+1) + img:get(i+2,j+1)
+ img:get(i+1,j+2) + img:get(i+1,j+0)
- 4 * img:get(i+1,j+1)
out:set(i,j,v)
end
end)]
end
terra runlaplace(input: rawstring,
output: rawstring) : {}
var i: GreyscaleImage, o : GreyscaleImage
i:load(input)
laplace(&i,&o)
o:save(output)
i:free(); o:free()
end
runlaplace("myinput","myoutput")
terralib.saveobj("runlaplace.o",
{runlaplace = runlaplace})

View File

@@ -1,23 +0,0 @@
local io = terralib.includec("stdio.h")
struct Count { value : int }
function luafn(a)
print("lua:",a.value)
a.value = a.value + 1
terrafn(a)
end
terra terrafn(a : &Count)
io.printf("terra: %d\n",a.value)
if a.value < 100 then
luafn(a)
end
return a.value
end
terra begin()
var c = Count {0}
return terrafn(&c)
end
local test = require("test")
test.eq(begin(),100)

View File

@@ -1,20 +0,0 @@
local f = assert(io.popen("uname", 'r'))
local s = assert(f:read('*a'))
f:close()
if s~="Darwin\n" then
print("Warning, not running test b/c this isn't a mac")
else
local OC = require("lib/objc")
local OCR = terralib.includec("objc/runtime.h")
terra main()
var nsobject = OC.NSObject
OCR.objc_allocateClassPair([&OCR.objc_class](nsobject.data),nil,0)
end
main:compile()
end

View File

@@ -1,20 +0,0 @@
ffi = require("ffi")
cstdio = terralib.includec("stdio.h")
local vec4 = &vector(float,4)
local align = terralib.aligned
terra lol( w : &float, out : &float)
var a = terralib.attrload(vec4(w),{align = 4})
terralib.attrstore(vec4(out), a, {align = 4})
end
dat = ffi.new("float[?]",32)
for i=0,31 do dat[i]=i end
datO = ffi.new("float[?]",32)
lol:compile()
lol:disas()
lol(dat, datO)

View File

@@ -1,15 +0,0 @@
local S = terralib.types.newstruct("mystruct")
struct A {
v : int
}
S.entries:insert( { type = A, field = "what" } )
terra foo()
var v : S
return v.what.v
end
foo:compile()

View File

@@ -1,14 +0,0 @@
c = terralib.includecstring [[
#include<stdio.h>
#include<string.h>
]]
struct exception { slug : int8[60]; code : int; msg : int8[960]; }
EXC_INFO = terralib.new(exception)
terra bar() c.memcpy(EXC_INFO.slug + 0, 'foobar', 7); c.printf('%s\n', EXC_INFO.slug + 0); end
bar()
terra zoo() EXC_INFO.slug[0] = 65; EXC_INFO.slug[1] = 0; c.printf('%s\n', EXC_INFO.slug + 0); end
zoo()
terra zoo2() EXC_INFO.slug[0] = 65; EXC_INFO.slug[1] = 0; return EXC_INFO.slug[0] end
assert(zoo2() == 65)

View File

@@ -1,33 +0,0 @@
function makecalcfn(inst)
local stk = {}
for i,v in ipairs(inst) do
if type(v) == "number" then
table.insert(stk,`v)
else
local b = table.remove(stk)
local a = table.remove(stk)
if v == "+" then
table.insert(stk,`a + b)
elseif v == "-" then
table.insert(stk,`a - b)
elseif v == "*" then
table.insert(stk,`a * b)
elseif v == "/" then
table.insert(stk,`a / b)
end
end
end
local result = table.remove(stk)
local terra wrapper()
return result
end
return wrapper
end
local calcfn = makecalcfn({5,4,"*",5,"-",3,"+"})
local test = require("test")
test.eq(calcfn(),18)

View File

@@ -1,53 +0,0 @@
terra bar(a : int, b : int) : int
return a + b
end
terra foo(a : int,b : int) : int
return bar(a,b) + 1
end
terra baz(a : int, b : int) : int
var f,c,d = 4,baz2(a,b)
return f + c + d
end and
terra baz2(a : int, b : int) : {int, int}
return a + 1, b + 2
end
terra two(a : int, b : int) : int
return a + b
end
terra baz3() : int
var a,b = baz2(1,2)
return two(a,b)
end
terra baz4() : int
return two(5,(baz2(1,2))._0)
end
terra baz5()
var a,b = baz2(0,0)
return 1,2,a,b
end
terra baz6()
var a,b,c,d = baz5()
var e = (baz5()._0)
return a + b + c + d + e
end
terra baz7(a : int)
if a < 3 then
return 1,(baz5())._0
else
return 100,2
end
end
local test = require("test")
test.eq(foo(2,3),6)
test.eq(baz(1,2),10)
test.eq(baz3(),6)
test.eq(baz4(),7)
test.eq(baz6(),7)
test.meq({100,2},baz7(10))

View File

@@ -1,9 +0,0 @@
for i = 1,10 do
local terra doprint()
print(1,2)
print(3)
end
doprint()
end

View File

@@ -1,45 +0,0 @@
struct B {
a : A
} and
struct A {
b : &B
}
struct C {
i : int
}
local U = struct { c : C }
local UP = &U
local FP = UP -> int
local FP2 = UP -> int
local FI = int -> int
local FI2 = int -> int
terra anon()
var b : B
b.a.b = &b
return 4
end
terra anon2()
var u = U { C {3} }
var fp : FP, fi : FI
var fi2 : FI2 = fi
var fp2 : FP2 = fp
var up : UP = &u
return up.c.i
end
test = require("test")
test.eq(anon(),4)
test.eq(anon2(),3)

View File

@@ -1,41 +0,0 @@
local C = terralib.includecstring [[
typedef union {
float v[2];
struct {
float x;
float y;
};
} float2;
static float2 a;
void doit() {
a.x = 4;
a.y = 5;
}
float2* what() { return &a; }
]]
C.float2:printpretty()
local anonstructgetter = macro(function(name,self)
for i,e in pairs(self:gettype():getfields()) do
if e.key:match("_%d+") and e.type:getfield(name) then
return `self.[e.key].[name]
end
end
error("no field "..name.." in struct of type "..tostring(T))
end)
C.float2.metamethods.__entrymissing = anonstructgetter
terra foo(pa : &C.float2)
var a = @C.what()
return a.v[0],a.v[1],a.x,a.y
end
C.doit()
local a = foo(C.what())
assert(4 == a._0)
assert(5 == a._1)
assert(4 == a._2)
assert(5 == a._3)

View File

@@ -1,11 +0,0 @@
terra bar(a : int) : int
return a + 1
end
terra foo(a : int) : int
return a
end
local test = require("test")
test.eq(foo(2),2)
test.eq(bar(2),3)

View File

@@ -1,13 +0,0 @@
C = terralib.includecstring [[
_Bool And(_Bool a, _Bool b) { return a && b; }
]]
terra foobar(a : bool, b : bool)
return C.And(a,b)
end
assert(foobar(false,true) == false)
assert(foobar(true,true) == true)

View File

@@ -1,488 +0,0 @@
local test = require("test")
terra f1(a : int)
return a
end
terra c1()
return 1 + f1(4),f1(4)
end
terra f2(a : int, b : float)
return a + b
end
terra c2()
return f2(3,4) + 1, f2(3,4)
end
terra f3()
return 3,4
end
terra c3()
var r = f3()
return f3()._0 + 1,unpackstruct(r)
end
terra f4() : {float,float}
return 3.25,4.25
end
terra c4()
var r = f4()
return f4()._0 + 1, unpackstruct(r)
end
terra f5(a : int) : {uint8,uint8,uint8,uint8}
return 0,1,a,3
end
terra c5()
var r = f5(8)
return f5(8)._0 + 1, unpackstruct(r)
end
terra f6() : {double, double, int}
return 3.25, 4.25, 3
end
terra c6()
var r = f6()
return f6()._0 + 1, unpackstruct(r)
end
test.meq({4.25, 3.25,4.25,3},c6())
terra f7(a : int) : {double, double, int}
return 3.25, 4.25, a
end
terra c7()
var r = f7(4)
return f7(4)._0 + 1, unpackstruct(r)
end
test.meq({4.25,3.25,4.25,4},c7())
terra f8() : {double, double}
return 3.25, 4.25
end
terra c8()
var r= f8()
return f8()._0 + 1, unpackstruct(r)
end
test.meq({4.25,3.25,4.25},c8())
struct S1 {
a : int;
b : int;
}
terra f9(a : S1)
return a.a+1,a.b+2
end
terra c9()
var a = S1 { 4, 5}
var r = f9(a)
return f9(a)._0 + 1, unpackstruct(r)
end
test.meq({6,5,7},c9())
struct S2 {
a : int;
b : double;
c : double;
}
terra f10(a : S2)
return a.a, a.b, a.c
end
terra c10()
var s2 = S2 { 4,5,6 }
var r = f10(s2)
return f10(s2)._0 + 1, unpackstruct(r)
end
test.meq({5,4,5,6},c10())
C = terralib.includec("stdio.h")
terra f11(a : int)
C.printf("f11 %d\n",a)
end
terra c11()
f11(7)
end
c11()
struct S3 {
a : vector(float,2);
b : double;
}
struct S4 {
b : double;
a : vector(float,2);
}
struct S5 {
a : vector(uint8,4);
b : int;
}
terra f12a(a : S3)
return a.a[0] + a.a[1], a.b
end
terra c12a()
var a = S3 { vector(2.25f, 3.25f), 4 }
var r = f12a(a)
return f12a(a)._0 + 1, unpackstruct(r)
end
test.meq({6.5,5.5,4},c12a())
terra f12b(a : S4)
return a.a[0] + a.a[1], a.b
end
terra c12b()
var a = S4 { 4, vector(2.25f, 3.25f) }
var r = f12b(a)
return f12b(a)._0 + 1, unpackstruct(r)
end
test.meq({6.5,5.5,4},c12b())
terra f12()
var a = S3 { vector(8.f,2.f), 3.0 }
var b = S4 { 3.0, vector(8.f,2.f) }
var c,d = f12a(a)
var e,f = f12b(b)
return c,d,e,f
end
terra f13a(a : S5)
return a.a[0] + a.a[1] + a.a[2] + a.a[3], a.b
end
terra f13()
var a = S5 { vectorof(int8, 1,2,3,4), 5 }
return f13a(a)
end
struct S6 {
a : float;
aa : float;
b : float
}
struct S7a {
a : int;
b : int;
}
struct S7 {
a : int;
b : S7a;
c : int;
}
terra f14(a : S6)
return a.a,a.aa,a.b
end
terra c14()
var a = S6 { 4,2,3}
var r = f14(a)
return f14(a)._0 + 1, unpackstruct(r)
end
test.meq({5,4,2,3},c14())
terra f15(a : S7)
return a.a, a.b.a, a.b.b, a.c
end
terra c15()
var a = S7 {1, S7a { 2,3 }, 4}
var r = f15(a)
return f15(a)._0 + 1, unpackstruct(r)
end
test.meq({2,1,2,3,4}, c15())
struct S8 {
a : uint8[7];
}
terra f16(a : S8)
return a.a[0],a.a[6]
end
terra c16()
var a = S8 { arrayof(uint8, 1,2,3,4,5,6,7) }
var r = f16(a)
return f16(a)._0 + 1, unpackstruct(r)
end
test.meq({2,1,7},c16())
struct S9 {
a : uint8[9];
}
terra f17(a : S9)
return a.a[0],a.a[8]
end
terra c17()
var a = S9 { arrayof(uint8, 1,2,3,4,5,6,7,8,9) }
var r = f17(a)
return f17(a)._0 + 1, unpackstruct(r)
end
test.meq({2,1,9},c17())
struct S10 {
a : double;
b : int64
}
terra f18a(a0 : int, a1 : int, a2 : int, a3 : int, a4: int, a5 : int, a : S10)
return a.a, a.b
end
terra c18a()
var r = f18a(1,2,3,4,5,6,S10{7,8})
return f18a(1,2,3,4,5,6,S10{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8},c18a())
terra f18b(a0 : int, a1 : int, a2 : int, a3 : int, a4: int, a : S10)
return a.a, a.b
end
terra c18b()
var r = f18b(1,2,3,4,5,S10{7,8})
return f18b(1,2,3,4,5,S10{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8},c18b())
terra f18c(a0 : int, a1 : int, a2 : int, a3 : int, a4: int, a : S10)
return a.a, a.b, a0, a1, a2
end
terra c18c()
var r = f18c(1,2,3,4,5,S10 {7,8})
return f18c(1,2,3,4,5,S10{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8,1,2,3},c18c())
struct S11 {
a : float;
b : int;
}
terra f18d(a0 : int, a1 : int, a2 : int, a3 : int, a4: int, a5 : int, a : S11)
return a.a, a.b
end
terra c18d()
var r = f18d(1,2,3,4,5,6,S11{7,8})
return f18d(1,2,3,4,5,6,S11{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8},c18d())
terra f18e(a0 : int, a1 : int, a2 : int, a3 : int, a4: int, a : S11)
return a.a, a.b
end
terra c18e()
var r = f18e(1,2,3,4,5,S11{7,8})
return f18e(1,2,3,4,5,S11{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8},c18e())
terra f18f(a0 : int, a1 : int, a2 : int, a3 : int, a4: int, a : S11)
return a.a, a.b, a0, a1, a2
end
terra c18f()
var r = f18f(1,2,3,4,5,S11{7,8})
return f18f(1,2,3,4,5,S11{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8,1,2,3},c18f())
terra f18g(a0 : float, a1 : float, a2 : float, a3 : float, a4: float, a5 : float, a6 : float, a7 : float, a : S10)
return a.a, a.b
end
terra c18g()
var r = f18g(1,2,3,4,5,6,9,10,S10{7,8})
return f18g(1,2,3,4,5,6,9,10,S10{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8},c18g())
terra f18h(a0 : float, a1 : float, a2 : float, a3 : float, a4: float, a5 : float, a6 : float, a : S10)
return a.a, a.b
end
terra c18h()
var r = f18h(1,2,3,4,5,6,9,S10{7,8})
return f18h(1,2,3,4,5,6,9,S10{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8},c18h())
terra f18i(a0 : float, a1 : float, a2 : float, a3 : float, a4: float, a5 : float, a6 : float, a : S10)
return a.a, a.b, a0, a1, a2
end
terra c18i()
var r = f18i(1,2,3,4,5,6,9,S10{7,8})
return f18i(1,2,3,4,5,6,9,S10{7,8})._0 + 1, unpackstruct(r)
end
test.meq({8,7,8,1,2,3},c18i())
struct S12 {
a : float;
b : int;
}
terra f19(a : S12)
return a.a, a.b
end
terra c19()
var a = S12 { 3,5 }
var r = f19(a)
return f19(a)._0 + 1, unpackstruct(r)
end
test.meq({4,3,5},c19())
terra f20(a : S10, b : int)
return a.a,a.b,b
end
terra c20()
var r = f20(S10{1,2},3)
return f20(S10{1,2},3)._0 + 1, unpackstruct(r)
end
test.meq({2,1,2,3},c20())
terra f21()
return
end
f21()
terra f22()
return S12 { 3, 4}
end
terra c22()
return f22().a, f22()
end
local s22_0, s22_1 = terralib.unpackstruct(c22())
test.eq(s22_0,3)
test.eq(s22_1.a,3)
test.eq(s22_1.b,4)
terra f23()
return S10 { 1, 2}
end
terra c23()
return f23().a, f23()
end
local s23_0, s23_1 = terralib.unpackstruct(c23())
test.eq(s23_0,1)
test.eq(s23_1.a,1)
test.eq(s23_1.b,2)
terra f24()
return S2 { 1,2,3}
end
terra c24()
return f24().a, f24()
end
local s24_0, s24_1 = terralib.unpackstruct(c24())
test.eq(s24_0,1)
test.eq(s24_1.a,1)
test.eq(s24_1.b,2)
test.eq(s24_1.c,3)
local s22 = f22()
test.eq(s22.a,3)
test.eq(s22.b,4)
local s23 = f23()
test.eq(s23.a,1)
test.eq(s23.b,2)
local s24 = f24()
test.eq(s24.a,1)
test.eq(s24.b,2)
test.eq(s24.c,3)
test.meq({1,2,3},f20({1,2},3))
test.eq(f1(3),3)
test.eq(f2(4,5),9)
test.meq({3,4},f3())
test.meq({3.25,4.25},f4())
test.meq({0,1,2,3},f5(2))
test.meq({3.25,4.25,3},f6())
test.meq({3.25,4.25,4},f7(4))
test.meq({3.25,4.25},f8())
test.meq({3,5},f9({2,3}))
test.meq({1,2.5,3.5},f10({1,2.5,3.5}))
f11(3)
test.meq({10,3,10,3},f12())
test.meq({10,5},f13())
test.meq({4,5,6},f14({4,5,6}))
test.meq({1,2,3,4},f15({1,{2,3},4}))
test.meq({1,7},f16({{1,2,3,4,5,6,7}}))
test.meq({1,9},f17({{1,2,3,4,5,6,7,8,9}}))
test.meq({7,8},f18a(1,2,3,4,5,6,{7,8}))
test.meq({7,8},f18b(1,2,3,4,5,{7,8}))
test.meq({7,8,1,2,3},f18c(1,2,3,4,5,{7,8}))
test.meq({7,8},f18d(1,2,3,4,5,6,{7,8}))
test.meq({7,8},f18e(1,2,3,4,5,{7,8}))
test.meq({7,8,1,2,3},f18f(1,2,3,4,5,{7,8}))
test.meq({9,10},f18g(1,2,3,4,5,6,7,8,{9,10}))
test.meq({9,10},f18h(1,2,3,4,5,6,7,{9,10}))
test.meq({9,10,1,2,3},f18i(1,2,3,4,5,6,7,{9,10}))
test.meq({4,5}, f19({4,5}))
test.meq({5,4},c1())
test.meq({8,7},c2())
test.meq({4,3,4},c3())
test.meq({4.25,3.25,4.25},c4())
test.meq({1,0,1,8,3},c5())

View File

@@ -1,14 +0,0 @@
local struct Type {
field: int
}
local terra foo(obj: Type) end
foo:setinlined(false)
local terra bar()
var obj: Type
defer foo(obj)
return
end
bar()

View File

@@ -1 +0,0 @@
terralib.includec("stdio.h",{"-v"})

View File

@@ -1,235 +0,0 @@
IO = terralib.includec("stdio.h")
local Class = require("lib/javalike")
struct A {
a : int
}
terra A:times2() : int
return self.a*2
end
struct B {
b : int
}
Class.extends(B,A)
terra B:combine(a : int) : int
return self.b + self.a + a
end
struct C {
c : double
}
Class.extends(C,B)
terra C:combine(a : int) : int
return self.c + self.a + self.b + a
end
terra C:times2() : double
return self.a * 4
end
terra doubleAnA(a : &A)
return a:times2()
end
terra combineAB(b : &B)
return b:combine(3)
end
terra returnA(a : A)
return a
end
terra foobar1()
var c = C.alloc()
c.a,c.b,c.c = 1,2,3.5
return c:times2()
end
assert(foobar1() == 4)
terra foobar()
var a = A.alloc()
a.a = 1
var b = B.alloc()
b.a,b.b = 1,2
var c = C.alloc()
c.a,c.b,c.c = 1,2,3.5
var r = doubleAnA(a) + doubleAnA(b) + doubleAnA(c) + combineAB(b) + combineAB(c)
a:free()
b:free()
c:free()
return r
end
assert(23 == foobar())
Doubles = Class.interface { times2 = {} -> int }
Adds = Class.interface { add = int -> int }
struct D {
data : int
}
Class.implements(D,Doubles)
Class.implements(D,Adds)
terra D:times2() : int
return self.data * 2
end
terra D:add(a : int) : int
return self.data + a
end
terra aDoubles(a : &Doubles)
return a:times2()
end
terra aAdds(a : &Adds)
return a:add(3)
end
terra foobar2()
var a : D
a:init()
a.data = 3
return aDoubles(&a) + aAdds(&a)
end
assert(12 == foobar2())
local IO = terralib.includec("stdio.h")
struct Animal {
data : int
}
terra Animal:speak() : {}
IO.printf("... %d\n",self.data)
end
struct Dog {
}
Class.extends(Dog,Animal)
terra Dog:speak() : {}
IO.printf("woof! %d\n",self.data)
end
struct Cat {
}
Class.extends(Cat,Animal)
terra Cat:speak() : {}
IO.printf("meow! %d\n",self.data)
end
terra dospeak(a : &Animal)
a:speak()
end
terra barnyard()
var c : Cat
var d : Dog
c:init()
d:init()
c.data,d.data = 0,1
dospeak(&c)
dospeak(&d)
end
barnyard()
local Add = Class.interface { add = int -> int }
local Sub = Class.interface { sub = int -> int }
local struct P {
data : int
}
Class.implements(P,Add)
local struct C {
data2 : int
}
Class.extends(C,P)
Class.implements(C,Sub)
terra P:add(b : int) : int
self.data = self.data + b
return self.data
end
terra C:sub(b : int) : int
return self.data2 - b
end
terra doadd(a : &Add)
return a:add(1)
end
terra dopstuff(p : &P)
return p:add(2) + doadd(p)
end
terra dosubstuff(s : &Sub)
return s:sub(1)
end
terra dotests()
var p : P
p:init()
var c : C
c:init()
p.data = 1
c.data = 1
c.data2 = 2
return dopstuff(&p) + dopstuff(&c) + dosubstuff(&c)
end
assert(dotests() == 15)
terra timeadd(a :&P, N : int)
IO.printf("%p\n",a)
for i = 0, N,10 do
a:add(1)
a:add(1)
a:add(1)
a:add(1)
a:add(1)
a:add(1)
a:add(1)
a:add(1)
a:add(1)
a:add(1)
end
return a
end
local a = global(C)
terra doinit() : &P
a:init()
a.data = 0
return &a
end
local v = doinit()
timeadd:compile()
local b = terralib.currenttimeinseconds()
--timeadd(v,100000000)
local e = terralib.currenttimeinseconds()
print(e - b)
print(v.data)

View File

@@ -1,45 +0,0 @@
local Interface = require("lib/golike")
local I = Interface.create {
get = {} -> int;
set = int -> {};
}
struct A {
data : int
}
terra A:get()
return self.data
end
terra A:set(a : int)
self.data = a
end
struct B {
data : int
}
terra B:get()
return self.data + 1
end
terra B:set(a : int)
self.data = self.data + a
end
terra anInterface(a : I)
a:set(3)
return a:get()
end
terra foo()
var a = A { 0 }
var b = B { 2 }
return anInterface(&a) + anInterface(&b)
end
local test = require("test")
test.eq(foo(),9)

View File

@@ -1,36 +0,0 @@
C = terralib.includec("stdio.h")
local Class = require("lib/javalike")
local Prints = Class.interface { print = {} -> {} }
struct Leaf {
data : int
}
Class.implements(Leaf,Prints)
terra Leaf:print() : {}
C.printf("%d\n",self.data)
end
struct Node {
next : &Leaf
}
Class.extends(Node,Leaf)
terra Node:print() : {}
C.printf("%d\n",self.data)
self.next:print()
end
terra test()
var a,b = Leaf.alloc(), Node.alloc()
a.data,b.data,b.next = 1,2,a
var p : &Prints = b
p:print()
end
test()

View File

@@ -1,22 +0,0 @@
J = require("lib/javalike")
struct Shape {
foo : int
}
Drawable = J.interface { draw = {} -> {} }
struct Square {
length : int
}
J.extends(Square,Shape)
J.implements(Square,Drawable)
terra Square:draw() : {} end
terra bar()
var a : &Square = Square.alloc()
a:draw()
end
bar()

View File

@@ -1,54 +0,0 @@
IO = terralib.includec("stdio.h")
local Class = require("lib/javalike")
struct A {
a : int;
bb : &B
} and struct B {
b : int;
aa : &A
}
Class.extends(B,A)
terra A:times2() : int
return self.a*2
end
terra B:combine(a : int) : int
return self.b + self.a + a
end
struct C {
c : double
}
Class.extends(C,B)
terra C:combine(a : int) : int
return self.c + self.a + self.b + a
end
terra C:times2() : double
return self.a * 4
end
terra doubleAnA(a : &A)
return a:times2()
end
terra combineAB(b : &B)
return b:combine(3)
end
terra returnA(a : A)
return a
end
terra foobar1()
var c = C.alloc()
c.a,c.b,c.c = 1,2,3.5
return c:times2()
end
assert(foobar1() == 4)

View File

@@ -1,66 +0,0 @@
IO = terralib.includec("stdio.h")
local Class = require("lib/javalikesimple")
struct A {
a : int
}
terra A:times2() : int
return self.a*2
end
struct B {
b : int
}
Class.extends(B,A)
terra B:combine(a : int) : int
return self.b + self.a + a
end
struct C {
c : double
}
Class.extends(C,B)
terra C:combine(a : int) : int
return self.c + self.a + self.b + a
end
terra C:times2() : double
return self.a * 4
end
terra doubleAnA(a : &A)
return a:times2()
end
terra combineAB(b : &B)
return b:combine(3)
end
terra foobar1()
var c : C
c:init()
c.a,c.b,c.c = 1,2,3.5
return c:times2()
end
assert(foobar1() == 4)
terra foobar()
var a : A, b : B, c : C
a:init(); b:init(); c:init()
a.a = 1
b.a,b.b = 1,2
c.a,c.b,c.c = 1,2,3.5
var r = b:times2() + doubleAnA(&a) + doubleAnA(&b) + doubleAnA(&c) + combineAB(&b) + combineAB(&c)
return r
end
assert(25 == foobar())

View File

@@ -1,137 +0,0 @@
-- reading and returning by reference produces far more compact assembly
-- in terra than reading and returning by value, even when a by-value function
-- is embedded in a by-reference wrapper. Also, optimization behavior varies
-- wildly. Here's a demonstration:
local float4 = tuple(float,float,float,float)
-- first, comparing assembly code size of stand-alone functions which all do
-- the same operation, in different styles. This is after aggressive
-- optimization by the LLVM backend
--------------------------------------------------------------------------------
-- read by value, return by value
-- 26 instructions, 120 bytes
local terra add4_val_val(a : float4, b : float4) : float4
return {
a._0+b._0,
a._1+b._1,
a._2+b._2,
a._3+b._3
}
end
-- read by value, return by reference
-- 24 instructions, 103 bytes
local terra add4_val_ref(c : &float4, a : float4, b: float4) : {}
c._0 = a._0 + b._0
c._1 = a._1 + b._1
c._2 = a._2 + b._2
c._3 = a._3 + b._3
end
-- read by reference, return by value
-- 14 instructions, 74 bytes
local terra add4_ref_val(a : &float4, b : &float4) : float4
return {
a._0+b._0,
a._1+b._1,
a._2+b._2,
a._3+b._3
}
end
-- read by reference, return by reference
-- 12 instructions, 57 bytes
local terra add4_ref_ref(c : &float4, a : &float4, b: &float4) : {}
c._0 = a._0 + b._0
c._1 = a._1 + b._1
c._2 = a._2 + b._2
c._3 = a._3 + b._3
end
-- read by reference, return by reference, BUT use temporary variables
-- 4 instructions, 12 bytes (!!!)
-- what happens here is that the tempvars are understood and treated
-- as a single SIMD register, and so only one addition is executed.
-- this is already reflected in the bytecode passed to LLVM, so I suppose
-- terra does this optimization.
local terra add4_ref_ref_tempvar(c : &float4, a : &float4, b: &float4) : {}
var x = a._0 + b._0
var y = a._1 + b._1
var z = a._2 + b._2
var w = a._3 + b._3
c._0 = x
c._1 = y
c._2 = z
c._3 = w
end
-- turn on always-inline for later
add4_val_val:setinlined(true)
add4_val_ref:setinlined(true)
add4_ref_val:setinlined(true)
add4_ref_ref:setinlined(true)
-- uncomment to look at individual LLVM bytecode & disassembly
add4_val_val:disas()
-- add4_val_ref:disas()
-- add4_ref_val:disas()
-- add4_ref_ref:disas()
-- add4_ref_ref_tempvar:disas()
if terralib.lookupsymbol and require("ffi").os ~= "Windows" then
terra sizecheck()
var si : terralib.SymbolInfo
terralib.lookupsymbol(add4_val_val,&si)
return si.size
end
assert(sizecheck() < 16)
end
--------------------------------------------------------------------------------
-- up to this point, one could argue that functions are always inlined into
-- other functions, and that the instructions dissolve in the greater scheme
-- of things.
-- if that is true, let's attempt to convert one style into another and see
-- if the optimizations catch up.
-- read by value, return by value -> read by reference, return by reference
-- the clunky solution to the slim interface
-- 38 instructions (!), 193 bytes (!!), so it gets *worse*
local terra add4_val_val_to_ref_ref(c : &float4, a : &float4, b: &float4) : {}
@c = add4_val_val(@a, @b)
end
-- read by reference, return by reference -> read by value, return by value
-- the slim solution to the clunky interface
-- 13 instructions, 61 bytes with tempvar -- wow, that's better than
-- the original function with the actual code in it!
local terra add4_ref_ref_to_val_val(a : float4, b : float4) : float4
var c : float4
add4_ref_ref_tempvar(&c, &a, &b)
return c
end
-- so what happens if we do a conversion back to the by-reference interface?
-- 41 instructions, 194 bytes
local terra add4_ref_ref_to_val_val_to_ref_ref(c : &float4, a : &float4, b: &float4) : {}
@c = add4_ref_ref_to_val_val(@a, @b)
end
-- and nest it once more, back to the by-value interface
-- 47 instructions, 208 bytes
-- so once we pass structs by-value, we'll never recover.
local terra add4_ref_ref_to_val_val_to_ref_ref_to_val_val(a : float4, b : float4) : float4
var c : float4
add4_ref_ref_to_val_val_to_ref_ref(&c, &a, &b)
return c
end
-- uncomment to look at individual disassembly
-- add4_val_val_to_ref_ref:disas()
-- add4_ref_ref_to_val_val:disas()
-- add4_ref_ref_to_val_val_to_ref_ref:disas()
-- add4_ref_ref_to_val_val_to_ref_ref_to_val_val:disas()

View File

@@ -1,17 +0,0 @@
local c = terralib.includec("stdio.h")
iamclean = macro(function(arg)
return quote
var a = 3
return a,arg
end
end)
terra doit()
var a = 4
iamclean(a)
end
local a = doit()
local test = require("test")
test.meq({3,4}, a)

View File

@@ -1,25 +0,0 @@
struct Foo {
c : float;
}
Foo.displayname = "struct.Foo"
terra useFoo()
var a : Foo
a.c = 4.5
return a.c
end
assert(4.5 == useFoo())
C = terralib.includecstring [[
typedef struct { int a; int b; } Foo;
]]
terra stuff()
var a : Foo
var b : C.Foo
b.a = 1
b.b = 2
a.c = 4.5
return b.a + b.b + a.c
end
assert(7.5 == stuff())

View File

@@ -1,24 +0,0 @@
G,T = terralib.includecstring [[
typedef struct {
double c;
} A;
struct A {
int b;
};
]]
G2,T2 = terralib.includecstring [[
struct A;
typedef struct C A;
]]
assert(T2.A == T.A)
assert(G2.A ~= T2.A and G2.A ~= G.A)
terra foo()
var a : G.A
var b : T.A
a.c = 4.5
b.b = 4.5
return a.c + b.b
end
assert(8.5 == foo())

View File

@@ -1,32 +0,0 @@
C,T = terralib.includecstring [[
struct Foo {
int a;
};
typedef int Foo;
typedef struct Foo * Bar;
typedef Foo * Bar2;
Bar bar(struct Foo * a, Bar2 b) { return (Bar)0; }
]]
terra what()
var f = T.Foo { 3 }
var a : C.Bar = &f
return a.a
end
assert(3 == what())
C,T = terralib.includecstring [[
typedef struct { int a; } Foo;
typedef Foo * FooPtr;
int returna(FooPtr a) { return a->a; }
]]
terra what2()
var a : C.Foo = C.Foo { 3 }
var ap : C.FooPtr = &a
return C.returna(ap)
end
assert(3 == what2())

View File

@@ -1,16 +0,0 @@
local a = 0
local foo = macro(function(arg)
bar:gettype(function()
a = bar()
end)
a = 1
return 3
end)
terra bar()
return foo()
end
assert(bar() == 3)
assert(a == 3)

View File

@@ -1,13 +0,0 @@
C = terralib.includecstring [[
struct A {
int a;
};
static struct A foo;
struct A * getfoo() {
foo.a = 4;
return &foo;
}
]]
assert(C.getfoo().a == 4)

View File

@@ -1,12 +0,0 @@
terra foo()
end
foo:compile()
terralib.dumpmodule()
terralib.includecstring [[
int foo() {
return 4;
}
]]

View File

@@ -1,35 +0,0 @@
local foo = terralib.constant(terralib.new(int[4],{1,2,3,4}))
struct A {
a : int;
b : float
}
local mystr = terralib.new(A,{3,4.5})
local const = constant(mystr)
terra bar()
return foo[3] + mystr.a
end
terra bar2()
return foo[1] + mystr.b
end
function wrapper(a)
return a + 1
end
local p1 = terralib.constant(int -> int, wrapper)
terra doit()
return p1(3)
end
local test = require("test")
test.eq(bar(),7)
test.eq(bar2(),6.5)
test.eq(doit(),4)

View File

@@ -1,21 +0,0 @@
local a = terralib.new(int,3)
local b = terralib.new(int8,4)
local c = terralib.new(int64,5)
local d = terralib.new(float,3.25)
local e = terralib.new(double,4.25)
f = global(4)
terra foo()
return &f
end
local pf = foo()
terra bar()
return a + b + c + d + e + @pf
end
local test = require("test")
test.eq(bar(),23.5)

View File

@@ -1,12 +0,0 @@
local b = 1
local dd = "d"
local c = symbol()
terra foo()
var a = { _0 = [b], [c] = 2, [dd] = 3, r = 4}
return a._0 + a.[c] + a.d + a.r
end
local test = require("test")
test.eq(foo(),10)

View File

@@ -1,47 +0,0 @@
function failit(match,fn)
local success,msg = pcall(fn)
if success then
error("failed to fail.",2)
elseif not string.match(msg,match) then
error("failed wrong: "..msg,2)
end
end
local test = require("test")
local erd = "Errors reported during"
local terra f1()
return test
end
failit(erd,function()
f1:compile()
end)
failit("referencing a function which failed to compile",function()
f1:compile()
end)
failit(erd,function()
local terra foo()
f1()
end
foo()
end)
local struct A {
a : int
}
A.metamethods.__getentries = function(self)
error("I AM BAD")
end
failit(erd,function()
A:complete()
end)
failit(erd,function()
local terra foo()
var a : A
end
foo()
end)

View File

@@ -1,209 +0,0 @@
function failit(match,fn)
local success,msg = pcall(fn)
if success then
error("failed to fail.",2)
elseif not string.match(msg,match) then
error("failed wrong: "..msg,2)
end
end
local test = require("test")
local erd = "Errors reported during"
failit(erd,function()
local aglobal = 5
local terra foo()
return [ (function() aglobal = 4; return 3 end)() ]
end
foo()
end)
A = terralib.types.newstruct()
A.entries:insert{ field = "a", type = int[2] }
A.metamethods.__getentries = function() error("NOPE") end
failit(erd,function()
A:complete()
end)
local terra foo()
var a : int[2]
return 3
end
foo:compile()
local a = 0
foo:gettype(function()
a = a + 1
end)
assert(a == 1)
local terra errored
failit(erd,function()
terra errored()
return A
end
errored:compile()
end)
failit("referencing a function which failed to compile",function()
errored()
end)
local terra ol(a : int) return a end
terra ol(a : int, b : int) return a + b end
assert(ol(3) == 3)
assert(ol(3,4) == 7)
failit("bad argument #1",function()
ol("a")
end)
ol:printstats()
NSE = terralib.types.newstruct()
failit(erd,function()
NSE.entries:insert { field = "a", type = "b" }
NSE:complete()
end)
SICS = terralib.types.newstruct()
SICS.entries:insert { field = symbol(), type = int }
a = 1
SICS.metamethods.__staticinitialize = function() a = a + 1 end
print(terralib.new(SICS,{3}))
NSF = terralib.types.newstruct()
NSF.entries:insert { type = int , field = 3 }
failit(erd,function()
NSF:complete()
end)
SICS:complete()
assert(a == 2)
struct SF {
a : SF2
} and struct SF2 {
a : int
}
SF2.metamethods.__getentries = function(self) SF:complete() end
failit(erd,function()
SF:complete()
end)
failit("Attempting to get a property of a type that previously resulted in an error.",function()
SF:complete()
end)
failit(erd,function()
struct SF { b : int }
end)
struct C {
a : int
}
C.metamethods.__cast = function() return error("CAST ERROR") end
local terra casttest()
return int(C { 3 })
end
failit(erd,function()
casttest()
end)
local terra shiftcheck()
var r = 1 << vector(1,2,3,4)
var r2 = vector(1,2,3,4) << 1
return r[0],r[1],r[2],r[3],r2[0],r2[1],r2[2],r2[3]
end
test.meq({2,4,8,16,2,4,6,8},shiftcheck())
failit(erd,function()
local terra foo()
return terralib.select(3,4,5)
end
foo()
end)
failit(erd,function()
local terra foo()
return (4):foo()
end
foo()
end)
failit(erd,function()
local terra foo()
return (C {3}):foo()
end
foo()
end)
failit(erd,function()
local a = { a = 4}
local terra foo()
return a()
end
foo()
end)
local saveit
local foom = macro(function(arg) saveit = arg; arg:astype(); end)
failit(erd,function()
local terra foo()
return foom(4)
end
foo()
end)
failit(erd,function()
local terra foo()
return saveit
end
foo()
end)
failit(erd,function()
local struct A {
a : 3
}
end)
failit(erd,function()
local terra foo
local bar = macro(function() foo:compile() end)
terra foo()
return bar()
end
foo()
end)
struct ATF {
a : int
}
ATF.metamethods.__getentries = function(self)
local terra foo()
var a : ATF
return a.a
end
foo:compile()
end
failit(erd,function()
ATF:complete()
end)
struct FA {
a : &FA2
} and struct FA2 {
a : int
}
FA.metamethods.__staticinitialize = function() a = a + 1 end
FA2.metamethods.__staticinitialize = function(self)
FA:complete()
end
FA:complete()
assert(a == 3)
--[[
freezing asynchronus needs to be called
]]

View File

@@ -1,107 +0,0 @@
function failit(match,fn)
local success,msg = pcall(fn)
if success then
error("failed to fail.",2)
elseif not string.match(msg,match) then
error("failed wrong: "..msg,2)
end
end
local test = require("test")
local erd = "Errors reported during"
terra foo()
end
foo:compile()
failit("inlining",function() foo:setinlined(false) end)
terra bar
failit("attempting to call",function()
bar:compile() end)
failit("expected a name",function()
terralib.intrinsic("far","far")
end)
print((&int)[4])
struct A {
a : int[4];
b : &int;
union { c : int; d : int};
e : int -> int;
}
A:printpretty()
terra rv() return vector(1,2,3) end
terra varg(a : vector(int,3))
return a[0] + a[1] + a[2]
end
assert (6 == varg(rv()))
local something = nil
failit(erd,function()
local terra a([something])
end
end)
failit(erd,function()
local terra what()
var a = A { ee = 4 }
end
what:compile()
end)
A.metamethods.__getmethod = function(self,methodname)
return 1
end
failit(erd,function()
local terra what()
var a : A
A.something()
end
what:compile()
end)
C = terralib.includec("stdio.h")
--TODO: __sputc isn't consistent across architectures, so this is a bad test
--failit("cannot import",function()
--local a = C.__sputc
--end)
failit("not found",function()
local a = C.nothing
end)
terra up() return unpackstruct(3) end
assert(3 == up())
failit(erd,function()
local terra aloads(a : &int)
return terralib.attrload(a),terralib.attrload(nil,{3}),terralib.attrstore(a,nil)
end
aloads:compile()
end)
terra f() end
terra f(a:int) end
failit("overloaded", function()
terralib.saveobj("err.o", { f = f})
end)
terra nop() end
terra nilcall(a : tuple(), b : int)
return b,a
end
terra nilstuff()
var a = nop()
var c,b = nilcall(a,4)
return c
end
assert(4 == nilstuff())
nilcall:disas()

View File

@@ -1 +0,0 @@
a = &int

View File

@@ -1 +0,0 @@
a = global( struct { a : int } )

View File

@@ -1,20 +0,0 @@
local C = terralib.includecstring [[
struct teststruct {
int idata;
float fdata;
};
void makeitlive(struct teststruct * s) {}
]]
terra foo()
var a : C.teststruct
a.idata = 3
a.fdata = 3.5
return a.idata + a.fdata
end
assert(foo() == 6.5)

View File

@@ -1,8 +0,0 @@
C = terralib.includecstring [[
typedef struct { int x; int y; } Point;
Point mkpoint() { Point p; p.x = p.y = 3; return p; }
]]
assert(C.mkpoint().x == 3)

View File

@@ -1,56 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
local ntid = cudalib.nvvm_read_ptx_sreg_ntid_x -- terralib.intrinsic("llvm.nvvm.read.ptx.sreg.ntid.x",{} -> int)
struct OneFloat {
a : int;
}
fn = terra(result : &float, bar : int[5])
result[tid()] = bar[0] + bar[1] + bar[2] + bar[3] + bar[4]
end
fn:setinlined(false)
--our very simple cuda kernel
--more work needs to be done to expose the right CUDA intrinsics
--to do more complicated things
foo = terra(result : &float)
fn(result, array(tid(),tid()+1,tid()+2,tid()+3,tid()+4) )
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
local C = terralib.includecstring [[
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
]]
local R = terralib.cudacompile({ bar = foo })
terra doit(N : int)
var data : &float
C.cudaMalloc([&&opaque](&data),sizeof(float)*N)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.bar(&launch,data)
var results : &float = [&float](C.malloc(sizeof(float)*N))
C.cudaMemcpy(results,data,sizeof(float)*N,2)
var result = 0.f
for i = 0,N do
result = result + results[i]
end
return result
end
local test = require("test")
local N = 16
function s(n) return (n - 1) * n / 2 end
function ex(i) return s(N+i) - s(i) end
local expected = ex(0) + ex(1) + ex(2) + ex(3) + ex(4)
test.eq(doit(N),expected)

View File

@@ -1,45 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
local ntid = cudalib.nvvm_read_ptx_sreg_ntid_x -- terralib.intrinsic("llvm.nvvm.read.ptx.sreg.ntid.x",{} -> int)
struct A {
a : int
b : int
c : int[2]
}
terra foo(result : &float,a : A, c : int, d : int[2])
var t = tid()
result[t] = t + a.a + a.b + c + a.c[0] + a.c[1] + d[0] + d[1]
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
local C = terralib.includecstring [[
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
]]
local R = terralib.cudacompile({ bar = foo })
terra doit(N : int)
var data : &float
C.cudaMalloc([&&opaque](&data),sizeof(float)*N)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.bar(&launch,data, A { 1,2, array(3,4) },5,array(6,7))
var results : &float = [&float](C.malloc(sizeof(float)*N))
C.cudaMemcpy(results,data,sizeof(float)*N,2)
var result = 0.f
for i = 0,N do
result = result + results[i]
end
return result
end
local test = require("test")
local N = 16
local expected = (N - 1)*N/2 + N*(1 + 2 + 3 + 4 + 5 + 6 + 7)
test.eq(doit(N),expected)

View File

@@ -1,39 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
foo = terra(result : &int)
var t = tid()
terralib.asm(terralib.types.unit,"red.global.max.u32 [$0], $1;","l,r",true,result,t)
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
local C = terralib.includecstring [[
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
]]
sync = terralib.externfunction("cudaThreadSynchronize", {} -> int)
local R = terralib.cudacompile({ bar = foo },true)
terra doit(N : int)
var data = 0
var location : &int
C.cudaMalloc([&&opaque](&location),sizeof(int))
C.cudaMemcpy(location,&data,sizeof(int),1)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.bar(&launch,location)
var data2 = -1
C.cudaMemcpy(&data2,location,sizeof(int),2)
return data2
end
assert(doit(32) == 31)

View File

@@ -1,14 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local const = cudalib.constantmemory(float, 1)
local terra kernel(data: &float)
end
local M = terralib.cudacompile({
kernel = kernel,
const = const
})

View File

@@ -1,48 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
local ntid = cudalib.nvvm_read_ptx_sreg_ntid_x -- terralib.intrinsic("llvm.nvvm.read.ptx.sreg.ntid.x",{} -> int)
theone = global(0)
theconst = cudalib.constantmemory(int,1)
terra foo(result : &float)
result[tid()] = tid() + theone + theconst[0]
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
local C = terralib.includecstring [[
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
]]
local R = terralib.cudacompile({ foo = foo, aone = theone, theconst = theconst })
terra doit(N : int)
var data : &float
C.cudaMalloc([&&opaque](&data),sizeof(float)*N)
var one = 1
var two = 2
C.cudaMemcpy(R.aone,&one,sizeof(int),1)
C.cudaMemcpy(R.theconst,&two,sizeof(int),1)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.foo(&launch,data)
var results : &float = [&float](C.malloc(sizeof(float)*N))
C.cudaMemcpy(results,data,sizeof(float)*N,2)
var result = 0.f
for i = 0,N do
result = result + results[i]
end
return result
end
local test = require("test")
local N = 16
local expected = (N - 1)*N/2 + 3*N
test.eq(doit(N),expected)

View File

@@ -1,30 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
C = terralib.includec("stdio.h")
vprintf = terralib.externfunction("cudart:vprintf", {&int8,&int8} -> int)
foo = terra(result : &float)
var t = tid()
vprintf("%d\n",[&int8](&t))
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
sync = terralib.externfunction("cudaThreadSynchronize", {} -> int)
local R = terralib.cudacompile({ bar = foo })
terra doit(N : int)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.bar(&launch,nil)
sync()
C.printf("and were done\n")
end
doit(3)

View File

@@ -1,65 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x
local ntid = cudalib.nvvm_read_ptx_sreg_ntid_x
theone = global(0)
theconst = cudalib.constantmemory(int,1)
terra foo(result : &float)
result[tid()] = tid() + theone + theconst[0]
end
local C = terralib.includecstring [[
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
]]
local R,L = terralib.cudacompile({ foo = foo, aone = theone, theconst = theconst },nil,nil,false)
terra doit(N : int)
var data : &float
C.cudaMalloc([&&opaque](&data),sizeof(float)*N)
var one = 1
var two = 2
C.cudaMemcpy(R.aone,&one,sizeof(int),1)
C.cudaMemcpy(R.theconst,&two,sizeof(int),1)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.foo(&launch,data)
var results : &float = [&float](C.malloc(sizeof(float)*N))
C.cudaMemcpy(results,data,sizeof(float)*N,2)
var result = 0.f
for i = 0,N do
result = result + results[i]
end
return result
end
terra main() : int
if L(nil,nil,nil,0) ~= 0 then
C.printf("WHAT\n")
end
var N = 16
var expected = (N - 1)*N/2 + 3*N
return terralib.select(doit(N) == expected,0,1)
end
local ffi = require 'ffi'
local path = ({ OSX = "/lib", Linux = "/lib64", Windows = "\\lib\\x64" })[ffi.os]
path = terralib.cudahome..path
local args = ffi.os == "Windows" and {path.."\\cuda.lib", path.."\\cudart.lib"}
or {"-L"..path, "-Wl,-rpath,"..path, "-lcuda", "-lcudart"}
local name = ffi.os == "Windows" and ".\\cudaoffline.exe" or "./cudaoffline"
terralib.saveobj(name,{ main = main },args)
assert(os.execute(name) == 0)

View File

@@ -1,20 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
C = terralib.includec("cuda_runtime.h")
cudalib.linkruntime()
terra foo()
var stuff : &opaque
C.cudaMalloc(&stuff,sizeof(int))
return stuff
end
local a = foo()
terra blank() end
terralib.cudacompile { blank = blank }
assert(0 == C.cudaMemset(a,0,4))

View File

@@ -1,50 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
vprintf = terralib.externfunction("cudart:vprintf", {&int8,&int8} -> int)
local function createbuffer(args)
local Buf = terralib.types.newstruct()
return quote
var buf : Buf
escape
for i,e in ipairs(args) do
local typ = e:gettype()
local field = "_"..tonumber(i)
typ = typ == float and double or typ
table.insert(Buf.entries,{field,typ})
emit quote
buf.[field] = e
end
end
end
in
[&int8](&buf)
end
end
printf = macro(function(fmt,...)
local buf = createbuffer({...})
return `vprintf(fmt,buf)
end)
foo = terra(result : &float)
var t = tid()
printf("a = %d, b = %f, c = %d\n",t,1.0 + t,t + 2)
end
sync = terralib.externfunction("cuStreamSynchronize", {&opaque} -> int)
annotations = { {"maxntidx",43}, {"minctasm",8}} -- example of annotating cuda kernel with launch bounds
local R = terralib.cudacompile({ bar = { kernel = foo, annotations = annotations }})
terra doit(N : int)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.bar(&launch,nil)
sync(nil)
end
doit(3)

View File

@@ -1,47 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
N = 1024
somedata = cudalib.sharedmemory(int,N)
terra bar(result : &int)
var t = tid()
somedata[t] = t
cudalib.nvvm_barrier0()
result[t] = somedata[N - 1 - t]
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
local C = terralib.includecstring [[
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
]]
local R = terralib.cudacompile({ bar = bar },true)
terra doit(N : int)
var data : &int
C.cudaMalloc([&&opaque](&data),sizeof(int)*N)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.bar(&launch,data)
var results : &int = [&int](C.malloc(sizeof(int)*N))
C.cudaMemcpy(results,data,sizeof(int)*N,2)
var result = 0
for i = 0,N do
--C.printf("result = %d\n",results[i])
result = result + results[i]
end
return result
end
local test = require("test")
local expected = (N - 1)*N/2
test.eq(doit(N),expected)

View File

@@ -1,47 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
local ntid = cudalib.nvvm_read_ptx_sreg_ntid_x -- terralib.intrinsic("llvm.nvvm.read.ptx.sreg.ntid.x",{} -> int)
fn = terra(result : &float)
var t = tid()
result[t] = t
end
fn:setinlined(false)
--our very simple cuda kernel
--more work needs to be done to expose the right CUDA intrinsics
--to do more compilicated things
foo = terra(result : &float)
fn(result)
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
local C = terralib.includecstring [[
#include "cuda_runtime.h"
#include <stdlib.h>
#include <stdio.h>
]]
local R = terralib.cudacompile({ bar = foo })
terra doit(N : int)
var data : &float
C.cudaMalloc([&&opaque](&data),sizeof(float)*N)
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.bar(&launch,data)
var results : &float = [&float](C.malloc(sizeof(float)*N))
C.cudaMemcpy(results,data,sizeof(float)*N,2)
var result = 0.f
for i = 0,N do
result = result + results[i]
end
return result
end
local test = require("test")
local N = 16
local expected = (N - 1)*N/2
test.eq(doit(N),expected)

View File

@@ -1,59 +0,0 @@
if not terralib.cudacompile then
print("CUDA not enabled, not performing test...")
return
end
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
C = terralib.includecstring [[
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <string.h>
]]
vprintf = terralib.externfunction("cudart:vprintf", {&int8,&int8} -> int)
foo = terra(result : C.cudaTextureObject_t)
var t = tid()
var r = terralib.asm([tuple(float,float,float,float)],
"tex.1d.v4.f32.s32 {$0,$1,$2,$3}, [$4, {$5}];",
"=f,=f,=f,=f,l,r",false,result,t)
var rr : double = r._0
vprintf("%f\n",[&int8](&rr))
end
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
sync = terralib.externfunction("cudaThreadSynchronize", {} -> int)
local R = terralib.cudacompile({ foo = foo })
terra doit(N : int)
var d_buffer : &double
C.cudaMalloc([&&opaque](&d_buffer),N*sizeof(float))
var h_buffer = arrayof(float,0,1,2,3,4,5,6,7,8,9,10)
C.cudaMemcpy(d_buffer,&h_buffer[0],sizeof(float)*N, C.cudaMemcpyHostToDevice)
var resDesc : C.cudaResourceDesc
C.memset(&resDesc,0,sizeof(C.cudaResourceDesc))
resDesc.resType = C.cudaResourceTypeLinear;
resDesc.res.linear.devPtr = d_buffer;
resDesc.res.linear.desc.f = C.cudaChannelFormatKindFloat;
resDesc.res.linear.desc.x = 32; -- bits per channel
resDesc.res.linear.sizeInBytes = N*sizeof(float);
var texDesc : C.cudaTextureDesc
C.memset(&texDesc, 0, sizeof(C.cudaTextureDesc));
texDesc.readMode = C.cudaReadModeElementType;
var tex : C.cudaTextureObject_t
C.cudaCreateTextureObject(&tex, &resDesc, &texDesc, nil);
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
R.foo(&launch,tex)
sync()
C.printf("and were done\n")
end
doit(10)

View File

@@ -1,34 +0,0 @@
local C = terralib.includecstring [[
union testunion {
int idata;
float fdata;
};
typedef
union {
int a;
float b;
} S;
void setUnion(union testunion * u, S * s){
u->idata = 3;
}
int getUnionInt(union testunion * u){
return u->idata;
}
]]
terra foo() : int
var u : C.testunion
C.setUnion(&u,nil)
var s : C.S
return C.getUnionInt(&u)
end
terralib.tree.printraw(C.S)
local test = require("test")
test.eq(foo(),3)

View File

@@ -1,35 +0,0 @@
local C = terralib.includecstring [[
union testunion {
int idata;
float fdata;
};
void setUnionI(union testunion * u){
u->idata = 3;
}
void setUnionF(union testunion * u){
u->fdata = 4.f;
}
]]
terra foo() : int
var u : C.testunion
C.setUnionI(&u)
var f = u.idata
C.setUnionF(&u)
var f2 = u.fdata
return f + f2
end
terra foo2()
var a : C.testunion
a.fdata = -3.0
a.idata = a.idata and not (1 << 31)
return a.fdata
end
local test = require("test")
test.eq(foo(),7)
test.eq(foo2(),3)

View File

@@ -1,10 +0,0 @@
terra foo(c : int, b : int)
terralib.debuginfo("../src/terralib.lua",300)
var a = b + c
terralib.debuginfo("../src/terralib.lua",301)
return a + a
end
foo(3,4)
--terralib.dumpmodule()

View File

@@ -1,10 +0,0 @@
struct A {
}
A.metamethods.__luametatable = { __index = function(self,idx) return 4 end }
A.methods.foo = function() return 5 end
a = terralib.new(A)
assert(a.foo == 4)

View File

@@ -1,9 +0,0 @@
C = terralib.includecstring [[
int foo = 4;
const int foo2 = 5;
]]
terra t()
C.foo = C.foo + 1;
return C.foo + C.foo2
end
assert(t() == 10)

View File

@@ -1,23 +0,0 @@
terralib.settypeerrordebugcallback(function(o)
o:printpretty()
end)
local function dosomecomplicatedstuff(a)
return `@a
end
local s,e = pcall(function()
local terra what(a : &opaque)
var b = [dosomecomplicatedstuff(a)]
return b
end
what:compile()
end)
assert(not s)
print(e)
assert(e:match("Errors reported during"))

View File

@@ -1,29 +0,0 @@
function failit(fn,err)
local success,msg = pcall(fn)
if success then
error("failed to fail.",2)
elseif not string.match(msg,err or "Errors reported during") then
error("failed wrong: "..msg,2)
end
end
failit(function()
local A = {}
terra A:foo() end
end,"expected a struct")
failit(function()
terra A.b() end
end,"failed attempting to index field")
failit(function()
terra A.b.c() end
end,"failed attempting to index field")
failit(function()
local A = 4
struct A.b {}
end,"failed attempting to index field")

View File

@@ -1,18 +0,0 @@
terra mything
terra mybar()
return mything(4)
end
terra mything(a : int)
return a
end
terra mything
terra mybar2()
return mything(4)
end
assert(mybar() == mybar2())

View File

@@ -1,16 +0,0 @@
import "lib/def"
local c = 3
local a = 4
local b = def(a) a + c
local d = b(10)
def e(a) a + c
local def f(a) a + c
print(d, e(10), f(10))
local test = require("test")
test.eq(d,13)
test.eq(e(10),13)
test.eq(f(10),13)

View File

@@ -1,9 +0,0 @@
local a = "+"
local b = "__sub"
local ops = {4,5}
terra foobar()
return operator(b,operator(a,3,operator("+",ops)))
end
assert(foobar() == -12)

View File

@@ -1,111 +0,0 @@
C = terralib.includec("stdio.h")
cur = global(int)
terra d(a : int)
if cur ~= a then
C.printf("found %d while expecting %d\n",a,cur)
error(cur)
end
cur = cur + 1
--C.printf("d%d\n",a)
end
d:setinlined(false)
terra side(a : int, b : {})
d(a)
end
terra doit()
d(0)
defer side(3,d(1))
d(2)
end
cur:set(0)
doit()
terra doit2()
d(0)
var i = 100
defer d(203)
repeat
defer d(2*(100-i)+2)
d(2*(100-i)+1)
i = i - 1
until i == 0
defer d(202)
do defer d(201) end
end
cur:set(0)
doit2()
terra doit3()
d(0)
defer d(11)
for i = 0,10 do
defer d(i+1)
end
end
cur:set(0)
doit3()
terra doit4()
d(0)
defer d(5)
if true then
defer d(2)
d(1)
end
if false then
else
defer d(3)
end
d(4)
end
cur:set(0)
doit4()
struct A {
}
A.methods.stackalloc = macro(function()
return quote
var a : A
defer a:free()
in
&a
end
end)
terra A:free()
d(1)
--C.printf("freeing A (%p)\n",self)
end
terra A:print()
d(0)
--C.printf("printing A (%p)\n",self)
end
terra doit5()
var a = A.stackalloc()
a:print()
end
cur:set(0)
doit5()
terra doit6(a : int)
defer d(2)
d(0)
if a == 0 then
defer d(1)
return 4
end
d(1)
return 3
end
cur:set(0)
doit6(0)
cur:set(0)
doit6(1)

View File

@@ -1,35 +0,0 @@
C = terralib.includec("stdio.h")
cur = global(int)
terra d(a : int)
if cur ~= a then
C.printf("found %d while expecting %d\n",a,cur)
error(cur)
end
cur = cur + 1
C.printf("d%d\n",a)
end
d:setinlined(false)
terra side(a : int, b : {})
d(a)
end
terra doit()
d(0)
for i = 0,10 do
defer d(2*i+2)
if true then
defer d(2*i + 1)
if i == 8 then
break
end
end
end
d(19)
end
cur:set(0)
doit()
doit:printpretty()

View File

@@ -1,82 +0,0 @@
c = global(int,0)
terra up()
c = c + 1
end
terra foo()
var a = 0
while([quote do defer up() end in a < 10 end]) do
a = a + 1
end
end
foo()
assert(c:get() == 11)
terra foo2()
var a = 0
while(a < 10 and [quote do defer up() end in true end]) do
a = a + 1
end
end
foo2()
assert(c:get() == 21)
terra foo3()
var a = 0
while true do
var r = a < 10 and [quote do defer up() end in true end]
if not r then break end
a = a + 1
end
end
foo3()
assert(c:get() == 31)
function failit(match,fn)
local success,msg = pcall(fn)
if success then
error("failed to fail.",2)
elseif not string.match(msg,match) then
error("failed wrong: "..msg,2)
end
end
local df = "defer statements are not allowed in conditional expressions"
failit(df,function()
local terra foo()
if [quote defer up() in true end] then end
end
foo()
end)
failit(df,function()
local terra foo()
while [quote defer up() in true end] do end
end
foo()
end)
failit(df,function()
local terra foo()
repeat until [quote defer up() in true end] do end
end
foo()
end)
failit(df,function()
local terra foo()
var a = true or [quote defer up() in true end]
end
foo()
end)
failit(df,function()
local terra foo()
var a = [quote defer up() in true end] and true
end
foo()
end)
local terra foo()
var a = [quote defer up() in 1 end] and 2
end
foo()
assert(c:get() == 32)

View File

@@ -1,35 +0,0 @@
C = terralib.includec("stdio.h")
cur = global(int)
terra d(a : int)
if cur ~= a then
C.printf("found %d while expecting %d\n",a,cur)
error(cur)
end
cur = cur + 1
C.printf("d%d\n",a)
end
d:setinlined(false)
terra foo()
d(0)
defer d(14)
var a = 0
::begin::
defer d(terralib.select(a == 10,13,a + 1))
if a >= 10 then
defer d(11)
goto theend
end
a = a + 1
goto begin
::theend::
defer d(12)
return a
end
foo:printpretty()
cur:set(0)
test = require("test")
test.eq(foo(),10)

View File

@@ -1,143 +0,0 @@
local IO = terralib.includec("stdio.h")
local stdlib = terralib.includec("stdlib.h")
local function isinteger(x) return math.floor(x) == x end
local NB = 48
terra naivel1matmul(A : &double, B : &double, C : &double, lda : int, ldb : int, ldc : int, alpha : double)
for m = 0, NB do
for n = 0, NB do
C[m*ldc + n] = alpha * C[m*ldc + n]
for k = 0, NB do
C[m*ldc + n] = C[m*ldc + n] + A[m*lda + k] * B[k*ldb + n]
end
end
end
end
function symmat(name,I,...)
if not I then return symbol(name) end
local r = {}
for i = 0,I-1 do
r[i] = symmat(name..tostring(i),...)
end
return r
end
function genl1matmul(NB, NK, RM, RN, V,prefetch)
assert(isinteger(NB / (RN*V)))
assert(isinteger(NB / RM))
local VP = &vector(double,V)
local terra vecload(data : &double, idx : int)
var addr = &data[idx]
return @VP(addr)
end
local terra vecstore(data : &double, idx : int, v : vector(double,V))
var addr = &data[idx]
@VP(addr) = v
end
local A,B,C,mm,nn, alpha = symbol("A"),symbol("B"),symbol("C"),symbol("mn"),symbol("nn"),symbol("alpha")
local lda,ldb,ldc = symbol("lda"),symbol("ldb"), symbol("ldc")
local a,b,c = symmat("a",NB/V,RM), symmat("b",NB,RN), symmat("c",RM,RN)
local kk = symbol("kk")
local loadc,storec = terralib.newlist(),terralib.newlist()
for m = 0, RM-1 do
for n = 0, RN-1 do
loadc:insert(quote
var [c[m][n]] = alpha * vecload(C,(mm+m)*ldc + nn + n*V)
end)
storec:insert(quote
vecstore(C,(mm+m)*ldc + nn + n*V,[c[m][n]])
end)
end
end
local calcc = terralib.newlist()
for kb = 0, NK/V-1 do
local kbV = kb*V
for m = 0, RM-1 do
calcc:insert(quote
var [a[kb][m]] = vecload(A,(mm+m)*lda + kk + kbV)
end)
end
for v = 0, V-1 do
local k = kbV+v
if not prefetch or (v == 0 and kb == 0) then
for n = 0, RN-1 do
calcc:insert(quote
var [b[k][n]] = vecload(B,(kk + k)*ldb + nn + n*V)
end)
end
end
for m = 0, RM-1 do
for n = 0, RN-1 do
calcc:insert(quote
[c[m][n]] = [c[m][n]] + [a[kb][m]][v] * [b[k][n]]
end)
if prefetch and not (v == V-1 and kb == NK/V-1) and m == RM-1 then --prefetch the next b
calcc:insert(quote
var [b[k+1][n]] = vecload(B,(kk + k + 1)*ldb + nn + n*V)
end)
end
end
end
end
end
return terra([A] : &double, [B] : &double, [C] : &double, [lda] : int, [ldb] : int, [ldc] : int, [alpha] : double)
for [mm] = 0, NB, RM do
for [nn] = 0, NB,RN*V do
[loadc];
for [kk] = 0, NB, NK do
[calcc];
end
[storec];
end
end
end
end
local NB2 = 8 * NB
local l1matmul = genl1matmul(NB,4, 3, 2, 4)
terra min(a : int, b : int)
return terralib.select(a < b, a, b)
end
terra my_dgemm(gettime : {} -> double, M : int, N : int, K : int, alpha : double, A : &double, lda : int, B : &double, ldb : int,
beta : double, C : &double, ldc : int)
for mm = 0,M,NB2 do
for nn = 0,N,NB2 do
for kk = 0,K, NB2 do
for m = mm,min(mm+NB2,M),NB do
for n = nn,min(nn+NB2,N),NB do
for k = kk,min(kk+NB2,K),NB do
l1matmul(A + m*lda + k,
B + k*ldb + n,
C + m*ldc + n,
lda,ldb,ldc, terralib.select(k == 0,0,1))
end
end
end
end
end
end
end
terralib.saveobj("my_dgemm.o", {my_dgemm = my_dgemm})

View File

@@ -1,163 +0,0 @@
function symmat(name,I,...)
if not I then return symbol(name) end
local r = {}
for i = 0,I-1 do
r[i] = symmat(name..tostring(i),...)
end
return r
end
function genkernel(NB, RM, RN, V,alpha)
local terra vecload(data : &double, idx : int)
var addr = &data[idx]
return @addr:as(&vector(double,V))
end
local terra vecstore(data : &double, idx : int, v : vector(double,V))
var addr = &data[idx]
@addr:as(&vector(double,V)) = v
end
local A,B,C,mm,nn = symbol("A"),symbol("B"),symbol("C"),symbol("mn"),symbol("nn")
local lda,ldb,ldc = NB,NB,NB
local a,b,c,caddr = symmat("a",RM), symmat("b",RN), symmat("c",RM,RN), symmat("caddr",RM,RN)
local k = symbol("k")
local loadc,storec = terralib.newlist(),terralib.newlist()
local VT = vector(double,V)
local VP = &VT
for m = 0, RM-1 do
for n = 0, RN-1 do
loadc:insert(quote
var [caddr[m][n]] = C + (mm+m)*ldc + nn + n*V
var [c[m][n]] = alpha * @VP([caddr[m][n]])
end)
storec:insert(quote
@VP([caddr[m][n]]) = [c[m][n]]
end)
end
end
local calcc = terralib.newlist()
for n = 0, RN-1 do
calcc:insert(quote
var [b[n]] = @VP(&B[k*ldb + nn + n*V])
end)
end
for m = 0, RM-1 do
calcc:insert(quote
var [a[m]] = VT(A[(mm+m)*lda + k])
end)
end
for m = 0, RM-1 do
for n = 0, RN-1 do
calcc:insert(quote
[c[m][n]] = [c[m][n]] + [a[m]] * [b[n]]
end)
end
end
return terra([A] : &double, [B] : &double, [C] : &double)
for [mm] = 0, NB, RM do
for [nn] = 0, NB,RN*V do
[loadc];
for [k] = 0, NB do
[calcc];
end
[storec];
end
end
end
end
local NB = 48
local NB2 = 8 * NB
local V = 4
l1dgemm0 = genkernel(NB,2,4,V,0)
l1dgemm1 = genkernel(NB,2,4,V,1)
terra min(a : int, b : int)
return terralib.select(a < b, a, b)
end
local stdlib = terralib.includec("stdlib.h")
local IO = terralib.includec("stdio.h")
local VP = &vector(double,V)
terra my_dgemm(gettime : {} -> double, M : int, N : int, K : int, alpha : double, A : &double, lda : int, B : &double, ldb : int,
beta : double, C : &double, ldc : int)
var AA = [&double](stdlib.malloc(sizeof(double)*M*K))
var BB = [&double](stdlib.malloc(sizeof(double)*K*N))
var CC = [&double](stdlib.malloc(sizeof(double)*M*N))
var i = 0
for mm = 0,M,NB do
for kk = 0,K,NB do
for m = mm,mm+NB do
for k = kk,kk+NB,V do
@VP(&AA[i]) = @VP(&A[m*lda + k])
i = i + V
end
end
end
end
i = 0
for kk = 0,K,NB do
for nn = 0,N,NB do
for k = kk,kk+NB do
for n = nn,nn+NB,V do
@VP(&BB[i]) = @VP(&B[k*ldb + n])
i = i + V
end
end
end
end
for mm = 0,M,NB2 do
for nn = 0,N,NB2 do
for kk = 0,K, NB2 do
for m = mm,min(mm+NB2,M),NB do
for n = nn,min(nn+NB2,N),NB do
for k = kk,min(kk+NB2,K),NB do
--IO.printf("%d %d starting at %d\n",m,k,m*lda + NB*k)
if k == 0 then
l1dgemm0(AA + (m*lda + NB*k),
BB + (k*ldb + NB*n),
CC + (m*ldc + NB*n))
else
l1dgemm1(AA + (m*lda + NB*k),
BB + (k*ldb + NB*n),
CC + (m*ldc + NB*n))
end
end
end
end
end
end
end
i = 0
for mm = 0,M,NB do
for nn = 0,N,NB do
for m = mm,mm+NB do
for n = nn,nn+NB,V do
@VP(&C[m*ldc + n]) = @VP(&CC[i])
i = i + V
end
end
end
end
stdlib.free(AA)
stdlib.free(BB)
stdlib.free(CC)
end
terralib.saveobj("my_dgemm.o", { my_dgemm = my_dgemm })

View File

@@ -1,213 +0,0 @@
function symmat(name,I,...)
if not I then return symbol(name) end
local r = {}
for i = 0,I-1 do
r[i] = symmat(name..tostring(i),...)
end
return r
end
local function isinteger(x) return math.floor(x) == x end
llvmprefetch = terralib.intrinsic("llvm.prefetch",{&opaque,int,int,int} -> {})
local function alignedload(addr)
return `terralib.attrload(addr, { align = 8 })
end
local function alignedstore(addr,v)
return `terralib.attrstore(addr,v, { align = 8 })
end
alignedload,alignedstore = macro(alignedload),macro(alignedstore)
function genkernel(NB, RM, RN, V,alpha,boundary)
local M,N,K, boundaryargs
if boundary then
M,N,K = symbol(int64,"M"),symbol(int64,"N"),symbol(int64,"K")
boundaryargs = terralib.newlist({M,N,K})
else
boundaryargs = terralib.newlist()
M,N,K = NB,NB,NB
end
local A,B,C,mm,nn,ld = symbol("A"),symbol("B"),symbol("C"),symbol("mn"),symbol("nn"),symbol("ld")
local lda,ldb,ldc = symbol("lda"),symbol("ldb"),symbol("ldc")
local a,b,c,caddr = symmat("a",RM), symmat("b",RN), symmat("c",RM,RN), symmat("caddr",RM,RN)
local k = symbol("k")
local loadc,storec = terralib.newlist(),terralib.newlist()
local VT = vector(double,V)
local VP = &VT
for m = 0, RM-1 do
for n = 0, RN-1 do
loadc:insert(quote
var [caddr[m][n]] = C + m*ldc + n*V
var [c[m][n]] = alpha * alignedload(VP([caddr[m][n]]))
end)
storec:insert(quote
alignedstore(VP([caddr[m][n]]),[c[m][n]])
end)
end
end
local calcc = terralib.newlist()
for n = 0, RN-1 do
calcc:insert(quote
var [b[n]] = alignedload(VP(&B[n*V]))
end)
end
for m = 0, RM-1 do
calcc:insert(quote
var [a[m]] = VT(A[m*lda])
end)
end
for m = 0, RM-1 do
for n = 0, RN-1 do
calcc:insert(quote
[c[m][n]] = [c[m][n]] + [a[m]] * [b[n]]
end)
end
end
local result = terra([A] : &double, [B] : &double, [C] : &double, [lda] : int64,[ldb] : int64,[ldc] : int64,[boundaryargs])
for [mm] = 0, M, RM do
for [nn] = 0, N,RN*V do
[loadc];
for [k] = 0, K do
llvmprefetch(B + 4*ldb,0,3,1);
[calcc];
B = B + ldb
A = A + 1
end
[storec];
A = A - K
B = B - ldb*K + RN*V
C = C + RN*V
end
C = C + RM * ldb - N
B = B - N
A = A + lda*RM
end
end
return result
end
local stdlib = terralib.includec("stdlib.h")
local IO = terralib.includec("stdio.h")
function generatedgemm(NB,NBF,RM,RN,V)
if not isinteger(NB/(RN*V)) then
return false
end
if not isinteger(NB/RM) then
return false
end
local NB2 = NBF * NB
local l1dgemm0 = genkernel(NB,RM,RN,V,0,false)
local l1dgemm1 = genkernel(NB,RM,RN,V,1,false)
local l1dgemm0b = genkernel(NB,1,1,1,0,true)
local l1dgemm1b = genkernel(NB,1,1,1,1,true)
local terra min(a : int, b : int)
return terralib.select(a < b, a, b)
end
return terra(gettime : {} -> double, M : int, N : int, K : int, alpha : double, A : &double, lda : int, B : &double, ldb : int,
beta : double, C : &double, ldc : int)
for mm = 0,M,NB2 do
for nn = 0,N,NB2 do
for kk = 0,K, NB2 do
for m = mm,min(mm+NB2,M),NB do
for n = nn,min(nn+NB2,N),NB do
for k = kk,min(kk+NB2,K),NB do
--IO.printf("%d %d starting at %d\n",m,k,m*lda + NB*k)
var MM,NN,KK = min(M-m,NB),min(N-n,NB),min(K-k,NB)
var isboundary = MM < NB or NN < NB or KK < NB
var AA,BB,CC = A + (m*lda + k),B + (k*ldb + n),C + (m*ldc + n)
if k == 0 then
if isboundary then
--IO.printf("b0 %d %d %d\n",MM,NN,KK)
l1dgemm0b(AA,BB,CC,lda,ldb,ldc,MM,NN,KK)
--IO.printf("be %d %d %d\n",MM,NN,KK)
else
l1dgemm0(AA,BB,CC,lda,ldb,ldc)
end
else
if isboundary then
--IO.printf("b %d %d %d\n",MM,NN,KK)
l1dgemm1b(AA,BB,CC,lda,ldb,ldc,MM,NN,KK)
--IO.printf("be %d %d %d\n",MM,NN,KK)
else
l1dgemm1(AA,BB,CC,lda,ldb,ldc)
end
end
end
end
end
end
end
end
end
end
--
local blocksizes = {16,24,32,40,48,56,64}
local regblocks = {1,2,4}
local vectors = {1,2,4,8}
--local best = { gflops = 0, b = 56, rm = 4, rn = 1, v = 8 }
local best = { gflops = 0, b = 40, rm = 4, rn = 2, v = 4 }
if false then
local tunefor = 1024
local harness = require("lib/matrixtestharness")
for _,b in ipairs(blocksizes) do
for _,rm in ipairs(regblocks) do
for _,rn in ipairs(regblocks) do
for _,v in ipairs(vectors) do
local my_dgemm = generatedgemm(b,5,rm,rn,v)
if my_dgemm then
print(b,rm,rn,v)
my_dgemm:compile()
local i = math.floor(tunefor / b) * b
local avg = 0
local s, times = harness.timefunctions("double",i,i,i,function(M,K,N,A,B,C)
my_dgemm(nil,M,N,K,1.0,A,K,B,N,0.0,C,N)
end)
if not s then
print("<error>")
break
end
print(i,unpack(times))
local avg = times[1]
if best.gflops < avg then
best = { gflops = avg, b = b, rm = rm, rn = rn, v = v }
terralib.tree.printraw(best)
end
end
end
end
end
end
end
terralib.tree.printraw(best)
local my_dgemm = generatedgemm(best.b, 5, best.rm, best.rn, best.v)
--my_dgemm:disas()
terralib.saveobj("my_dgemm.o", { my_dgemm = my_dgemm })

View File

@@ -1,75 +0,0 @@
function symmat(name,I,...)
if not I then return symbol(name) end
local r = {}
for i = 0,I-1 do
r[i] = symmat(name..tostring(i),...)
end
return r
end
prefetch = terralib.intrinsic("llvm.prefetch",{&opaque,int,int,int} -> {})
function genkernel(NB, RM, RN, V,alpha)
local A,B,C = symbol("A"),symbol("B"),symbol("C")
local mm,nn = symbol("mn"),symbol("nn")
local lda,ldb,ldc = symbol("lda"),symbol("ldb"),symbol("ldc")
local a,b = symmat("a",RM), symmat("b",RN)
local c,caddr = symmat("c",RM,RN), symmat("caddr",RM,RN)
local k = symbol("k")
local loadc,storec = terralib.newlist(),terralib.newlist()
local VT = vector(double,V)
local VP = &VT
for m = 0, RM-1 do for n = 0, RN-1 do
loadc:insert(quote
var [caddr[m][n]] = C + m*ldc + n*V
var [c[m][n]] =
alpha * @VP([caddr[m][n]])
end)
storec:insert(quote
@VP([caddr[m][n]]) = [c[m][n]]
end)
end end
local calcc = terralib.newlist()
for n = 0, RN-1 do
calcc:insert(quote
var [b[n]] = @VP(&B[n*V])
end)
end
for m = 0, RM-1 do
calcc:insert(quote
var [a[m]] = VT(A[m*lda])
end)
end
for m = 0, RM-1 do for n = 0, RN-1 do
calcc:insert(quote
[c[m][n]] = [c[m][n]] + [a[m]] * [b[n]]
end)
end end
return terra([A] : &double, [B] : &double, [C] : &double,
[lda] : int64,[ldb] : int64,[ldc] : int64)
for [mm] = 0, NB, RM do
for [nn] = 0, NB, RN*V do
[loadc];
for [k] = 0, NB do
prefetch(B + 4*ldb,0,3,1);
[calcc];
B,A = B + ldb,A + 1
end
[storec];
A,B,C = A - NB,B - ldb*NB + RN*V,C + RN*V
end
A,B,C = A + lda*RM, B - NB, C + RM * ldb - NB
end
end
end
local a = genkernel(40,4,2,8,1)
a:compile()
a:printpretty()
terra short_saxpy(a : float,
x : vector(float,4), y : vector(float,4))
return a*x + y
end
short_saxpy:printpretty()

Some files were not shown because too many files have changed in this diff Show More