don't know
This commit is contained in:
47
src/Cuda/VectorOperations.cu
Normal file
47
src/Cuda/VectorOperations.cu
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "./VectorOperations.h"
|
||||
|
||||
// CUDA kernel. Each thread takes care of one element of c
|
||||
__global__ void __ADD(float *a, float *b, float *ret, int n)
|
||||
{
|
||||
// Get our global thread ID
|
||||
int id = blockIdx.x*blockDim.x+threadIdx.x;
|
||||
|
||||
if (id >= n)
|
||||
return;
|
||||
|
||||
// Make sure we do not go out of bounds
|
||||
float tmp=0.0;
|
||||
for(int i=0;i<n;i++)
|
||||
{
|
||||
tmp+=a[n] * b[n];
|
||||
}
|
||||
ret[id] = tmp;
|
||||
}
|
||||
|
||||
Shin::Cuda::VectorOperations::VectorOperations(int size)
|
||||
{
|
||||
cudaMalloc(&clientA, sizeof(float)*size);
|
||||
cudaMalloc(&clientB, sizeof(float)*size);
|
||||
cudaMalloc(&clientC, sizeof(float)*size);
|
||||
}
|
||||
|
||||
Shin::Cuda::VectorOperations::~VectorOperations()
|
||||
{
|
||||
cudaFree(clientA);
|
||||
cudaFree(clientB);
|
||||
cudaFree(clientC);
|
||||
}
|
||||
|
||||
void Shin::Cuda::VectorOperations::add(float *a, float *b, float *ret, int n)
|
||||
{
|
||||
//cudaMemcpyAsync(clientA, a, n*sizeof(float), cudaMemcpyHostToDevice, );
|
||||
//cudaMemcpyAsync(clientB, a, n*sizeof(float), cudaMemcpyHostToDevice, );
|
||||
cudaMemcpy( clientA, a, n*sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy( clientB, b, n*sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy( clientC, ret, n*sizeof(float), cudaMemcpyHostToDevice);
|
||||
int blockSize, gridSize;
|
||||
blockSize = 1024;
|
||||
gridSize = (int)ceil((float)n/blockSize);
|
||||
__ADD<<< 1, n>>>(clientA, clientB, clientC, n);
|
||||
cudaMemcpy( ret, clientC, n*sizeof(float), cudaMemcpyDeviceToHost );
|
||||
}
|
||||
Reference in New Issue
Block a user