mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 01:30:22 +00:00
46 lines
1.0 KiB
Plaintext
46 lines
1.0 KiB
Plaintext
#include <stdio.h>
|
|
#include <cuda_runtime.h>
|
|
|
|
/**
|
|
* CUDA Kernel Device code
|
|
*
|
|
* Computes the vector addition of A and B into C. The 3 vectors have the same
|
|
* number of elements numElements.
|
|
*/
|
|
__global__ void
|
|
vectorAdd(const float *A, const float *B, float *C, int numElements)
|
|
{
|
|
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
if (i < numElements)
|
|
{
|
|
C[i] = A[i] + B[i];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Host main routine
|
|
*/
|
|
int
|
|
main(void)
|
|
{
|
|
// Error code to check return values for CUDA calls
|
|
cudaError_t err = cudaSuccess;
|
|
|
|
// Launch the Vector Add CUDA Kernel
|
|
int threadsPerBlock = 256;
|
|
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
|
|
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
|
|
err = cudaGetLastError();
|
|
|
|
if (err != cudaSuccess)
|
|
{
|
|
fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n", cudaGetErrorString(err));
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
// Reset the device and exit
|
|
err = cudaDeviceReset();
|
|
|
|
return 0;
|
|
} |