don't know

2014-12-10 16:01:53 +01:00
parent 993b4d3f04
commit aab9a073e9
35 changed files with 725 additions and 100 deletions
--- a/src/Cuda/VectorOperations.cu
+++ b/src/Cuda/VectorOperations.cu
@@ -0,0 +1,47 @@
+#include "./VectorOperations.h"
+
+// CUDA kernel. Each thread takes care of one element of c
+__global__ void __ADD(float *a, float *b, float *ret, int n)
+{
+    // Get our global thread ID
+    int id = blockIdx.x*blockDim.x+threadIdx.x;
+ 
+    if (id >= n)
+		return;
+
+	// Make sure we do not go out of bounds
+	float tmp=0.0;
+	for(int i=0;i<n;i++)
+	{
+		tmp+=a[n] * b[n];
+	}
+    ret[id] = tmp;
+}
+
+Shin::Cuda::VectorOperations::VectorOperations(int size)
+{
+	cudaMalloc(&clientA, sizeof(float)*size);
+	cudaMalloc(&clientB, sizeof(float)*size);
+	cudaMalloc(&clientC, sizeof(float)*size);
+}
+
+Shin::Cuda::VectorOperations::~VectorOperations()
+{
+	cudaFree(clientA);
+	cudaFree(clientB);
+	cudaFree(clientC);
+}
+
+void Shin::Cuda::VectorOperations::add(float *a, float *b, float *ret, int n)
+{
+	//cudaMemcpyAsync(clientA, a, n*sizeof(float), cudaMemcpyHostToDevice, );
+	//cudaMemcpyAsync(clientB, a, n*sizeof(float), cudaMemcpyHostToDevice, );
+	cudaMemcpy( clientA, a, n*sizeof(float), cudaMemcpyHostToDevice);
+	cudaMemcpy( clientB, b, n*sizeof(float), cudaMemcpyHostToDevice);
+	cudaMemcpy( clientC, ret, n*sizeof(float), cudaMemcpyHostToDevice);
+	int blockSize, gridSize;
+	blockSize = 1024;
+	gridSize = (int)ceil((float)n/blockSize);
+	__ADD<<< 1, n>>>(clientA, clientB, clientC, n);
+	cudaMemcpy( ret, clientC, n*sizeof(float), cudaMemcpyDeviceToHost );
+}