don't know

This commit is contained in:
2014-12-10 16:01:53 +01:00
parent 993b4d3f04
commit aab9a073e9
35 changed files with 725 additions and 100 deletions

View File

@@ -0,0 +1,47 @@
#include "./VectorOperations.h"
// CUDA kernel. Each thread takes care of one element of c
__global__ void __ADD(float *a, float *b, float *ret, int n)
{
// Get our global thread ID
int id = blockIdx.x*blockDim.x+threadIdx.x;
if (id >= n)
return;
// Make sure we do not go out of bounds
float tmp=0.0;
for(int i=0;i<n;i++)
{
tmp+=a[n] * b[n];
}
ret[id] = tmp;
}
Shin::Cuda::VectorOperations::VectorOperations(int size)
{
cudaMalloc(&clientA, sizeof(float)*size);
cudaMalloc(&clientB, sizeof(float)*size);
cudaMalloc(&clientC, sizeof(float)*size);
}
Shin::Cuda::VectorOperations::~VectorOperations()
{
cudaFree(clientA);
cudaFree(clientB);
cudaFree(clientC);
}
void Shin::Cuda::VectorOperations::add(float *a, float *b, float *ret, int n)
{
//cudaMemcpyAsync(clientA, a, n*sizeof(float), cudaMemcpyHostToDevice, );
//cudaMemcpyAsync(clientB, a, n*sizeof(float), cudaMemcpyHostToDevice, );
cudaMemcpy( clientA, a, n*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy( clientB, b, n*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy( clientC, ret, n*sizeof(float), cudaMemcpyHostToDevice);
int blockSize, gridSize;
blockSize = 1024;
gridSize = (int)ceil((float)n/blockSize);
__ADD<<< 1, n>>>(clientA, clientB, clientC, n);
cudaMemcpy( ret, clientC, n*sizeof(float), cudaMemcpyDeviceToHost );
}