mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			46 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			46 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
#include <stdio.h>
 | 
						|
#include <cuda_runtime.h>
 | 
						|
 | 
						|
/**
 | 
						|
 * CUDA Kernel Device code
 | 
						|
 *
 | 
						|
 * Computes the vector addition of A and B into C. The 3 vectors have the same
 | 
						|
 * number of elements numElements.
 | 
						|
 */
 | 
						|
__global__ void
 | 
						|
vectorAdd(const float *A, const float *B, float *C, int numElements)
 | 
						|
{
 | 
						|
    int i = blockDim.x * blockIdx.x + threadIdx.x;
 | 
						|
 | 
						|
    if (i < numElements)
 | 
						|
    {
 | 
						|
        C[i] = A[i] + B[i];
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Host main routine
 | 
						|
 */
 | 
						|
int
 | 
						|
main(void)
 | 
						|
{
 | 
						|
    // Error code to check return values for CUDA calls
 | 
						|
    cudaError_t err = cudaSuccess;
 | 
						|
 | 
						|
    // Launch the Vector Add CUDA Kernel
 | 
						|
    int threadsPerBlock = 256;
 | 
						|
    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
 | 
						|
    vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
 | 
						|
    err = cudaGetLastError();
 | 
						|
 | 
						|
    if (err != cudaSuccess)
 | 
						|
    {
 | 
						|
        fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n", cudaGetErrorString(err));
 | 
						|
        exit(EXIT_FAILURE);
 | 
						|
    }
 | 
						|
 | 
						|
    // Reset the device and exit
 | 
						|
    err = cudaDeviceReset();
 | 
						|
 | 
						|
    return 0;
 | 
						|
} |