mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
samples: add Terra samples
source: https://github.com/zdevito/terra/tree/master/tests
This commit is contained in:
56
samples/Terra/cudaagg.t
Normal file
56
samples/Terra/cudaagg.t
Normal file
@@ -0,0 +1,56 @@
|
||||
if not terralib.cudacompile then
|
||||
print("CUDA not enabled, not performing test...")
|
||||
return
|
||||
end
|
||||
|
||||
local tid = cudalib.nvvm_read_ptx_sreg_tid_x--terralib.intrinsic("llvm.nvvm.read.ptx.sreg.tid.x",{} -> int)
|
||||
local ntid = cudalib.nvvm_read_ptx_sreg_ntid_x -- terralib.intrinsic("llvm.nvvm.read.ptx.sreg.ntid.x",{} -> int)
|
||||
|
||||
|
||||
struct OneFloat {
|
||||
a : int;
|
||||
}
|
||||
|
||||
fn = terra(result : &float, bar : int[5])
|
||||
result[tid()] = bar[0] + bar[1] + bar[2] + bar[3] + bar[4]
|
||||
end
|
||||
fn:setinlined(false)
|
||||
|
||||
--our very simple cuda kernel
|
||||
--more work needs to be done to expose the right CUDA intrinsics
|
||||
--to do more complicated things
|
||||
foo = terra(result : &float)
|
||||
fn(result, array(tid(),tid()+1,tid()+2,tid()+3,tid()+4) )
|
||||
end
|
||||
|
||||
terralib.includepath = terralib.includepath..";/usr/local/cuda/include"
|
||||
|
||||
local C = terralib.includecstring [[
|
||||
#include "cuda_runtime.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
]]
|
||||
local R = terralib.cudacompile({ bar = foo })
|
||||
|
||||
terra doit(N : int)
|
||||
var data : &float
|
||||
C.cudaMalloc([&&opaque](&data),sizeof(float)*N)
|
||||
var launch = terralib.CUDAParams { 1,1,1, N,1,1, 0, nil }
|
||||
R.bar(&launch,data)
|
||||
var results : &float = [&float](C.malloc(sizeof(float)*N))
|
||||
C.cudaMemcpy(results,data,sizeof(float)*N,2)
|
||||
var result = 0.f
|
||||
for i = 0,N do
|
||||
result = result + results[i]
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
local test = require("test")
|
||||
local N = 16
|
||||
|
||||
function s(n) return (n - 1) * n / 2 end
|
||||
function ex(i) return s(N+i) - s(i) end
|
||||
local expected = ex(0) + ex(1) + ex(2) + ex(3) + ex(4)
|
||||
test.eq(doit(N),expected)
|
||||
|
||||
Reference in New Issue
Block a user