mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-12-08 20:38:47 +00:00
added cuda lexer and removed example from c++ samples
This commit is contained in:
46
samples/Cuda/vectorAdd.cu
Normal file
46
samples/Cuda/vectorAdd.cu
Normal file
@@ -0,0 +1,46 @@
|
||||
#include <stdio.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
/**
|
||||
* CUDA Kernel Device code
|
||||
*
|
||||
* Computes the vector addition of A and B into C. The 3 vectors have the same
|
||||
* number of elements numElements.
|
||||
*/
|
||||
__global__ void
|
||||
vectorAdd(const float *A, const float *B, float *C, int numElements)
|
||||
{
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (i < numElements)
|
||||
{
|
||||
C[i] = A[i] + B[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Host main routine
|
||||
*/
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
// Error code to check return values for CUDA calls
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
// Launch the Vector Add CUDA Kernel
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
||||
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n", cudaGetErrorString(err));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Reset the device and exit
|
||||
err = cudaDeviceReset();
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user