mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			46 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			46 lines
		
	
	
		
			1.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| #include <stdio.h>
 | |
| #include <cuda_runtime.h>
 | |
| 
 | |
| /**
 | |
|  * CUDA Kernel Device code
 | |
|  *
 | |
|  * Computes the vector addition of A and B into C. The 3 vectors have the same
 | |
|  * number of elements numElements.
 | |
|  */
 | |
| __global__ void
 | |
| vectorAdd(const float *A, const float *B, float *C, int numElements)
 | |
| {
 | |
|     int i = blockDim.x * blockIdx.x + threadIdx.x;
 | |
| 
 | |
|     if (i < numElements)
 | |
|     {
 | |
|         C[i] = A[i] + B[i];
 | |
|     }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Host main routine
 | |
|  */
 | |
| int
 | |
| main(void)
 | |
| {
 | |
|     // Error code to check return values for CUDA calls
 | |
|     cudaError_t err = cudaSuccess;
 | |
| 
 | |
|     // Launch the Vector Add CUDA Kernel
 | |
|     int threadsPerBlock = 256;
 | |
|     int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
 | |
|     vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
 | |
|     err = cudaGetLastError();
 | |
| 
 | |
|     if (err != cudaSuccess)
 | |
|     {
 | |
|         fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n", cudaGetErrorString(err));
 | |
|         exit(EXIT_FAILURE);
 | |
|     }
 | |
| 
 | |
|     // Reset the device and exit
 | |
|     err = cudaDeviceReset();
 | |
| 
 | |
|     return 0;
 | |
| } |