This commit is contained in:
2015-08-20 14:28:24 +02:00
parent f4c9487af3
commit 8c369a56d8
25 changed files with 12 additions and 872 deletions

View File

@@ -1,8 +0,0 @@
all: test
VectorOperations.o: VectorOperations.cu VectorOperations.h
/usr/local/cuda-6.5/bin/nvcc -c VectorOperations.cu -o VectorOperations.o
test: VectorOperations.o VectorOperations.h test.cpp
g++ -c -std=c++14 -O0 ./test.cpp -o test.o
/usr/local/cuda-6.5/bin/nvcc ./test.o VectorOperations.o -o test

View File

@@ -1,12 +0,0 @@
#include "./VectorOperations.h"
// CUDA kernel. Each thread takes care of one element of c
__global__ void vecAdd(double *a, double *b, double *c, int n)
{
// Get our global thread ID
int id = blockIdx.x*blockDim.x+threadIdx.x;
// Make sure we do not go out of bounds
if (id < n)
c[id] = a[id] + b[id];
}

View File

@@ -1,47 +0,0 @@
#include "./VectorOperations.h"
// CUDA kernel. Each thread takes care of one element of c
__global__ void __ADD(float *a, float *b, float *ret, int n)
{
// Get our global thread ID
int id = blockIdx.x*blockDim.x+threadIdx.x;
if (id >= n)
return;
// Make sure we do not go out of bounds
float tmp=0.0;
for(int i=0;i<n;i++)
{
tmp+=a[n] * b[n];
}
ret[id] = tmp;
}
Shin::Cuda::VectorOperations::VectorOperations(int size)
{
cudaMalloc(&clientA, sizeof(float)*size);
cudaMalloc(&clientB, sizeof(float)*size);
cudaMalloc(&clientC, sizeof(float)*size);
}
Shin::Cuda::VectorOperations::~VectorOperations()
{
cudaFree(clientA);
cudaFree(clientB);
cudaFree(clientC);
}
void Shin::Cuda::VectorOperations::add(float *a, float *b, float *ret, int n)
{
//cudaMemcpyAsync(clientA, a, n*sizeof(float), cudaMemcpyHostToDevice, );
//cudaMemcpyAsync(clientB, a, n*sizeof(float), cudaMemcpyHostToDevice, );
cudaMemcpy( clientA, a, n*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy( clientB, b, n*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy( clientC, ret, n*sizeof(float), cudaMemcpyHostToDevice);
int blockSize, gridSize;
blockSize = 1024;
gridSize = (int)ceil((float)n/blockSize);
__ADD<<< 1, n>>>(clientA, clientB, clientC, n);
cudaMemcpy( ret, clientC, n*sizeof(float), cudaMemcpyDeviceToHost );
}

View File

@@ -1,20 +0,0 @@
#ifndef VECT_OP_
#define VECT_OP_
namespace Shin
{
namespace Cuda
{
class VectorOperations
{
public:
VectorOperations(int maxSize);
~VectorOperations();
void add(float *a, float *b, float *ret, int n);
protected:
float *clientA;
float *clientB;
float *clientC;
};
}
}
#endif

Binary file not shown.

View File

@@ -1,66 +0,0 @@
#include <stdio.h>
__global__ void vector_add(int *a, int *b, int *c)
{
/* insert code to calculate the index properly using blockIdx.x, blockDim.x, threadIdx.x */
int index = blockIdx.x * blockDim.x + threadIdx.x;
c[index] = a[index] + b[index];
}
/* experiment with N */
/* how large can it be? */
#define N (2048*2048)
#define THREADS_PER_BLOCK 512
int main()
{
int *a, *b, *c;
int *d_a, *d_b, *d_c;
int size = N * sizeof( int );
/* allocate space for device copies of a, b, c */
cudaMalloc( (void **) &d_a, size );
cudaMalloc( (void **) &d_b, size );
cudaMalloc( (void **) &d_c, size );
/* allocate space for host copies of a, b, c and setup input values */
a = (int *)malloc( size );
b = (int *)malloc( size );
c = (int *)malloc( size );
for( int i = 0; i < N; i++ )
{
a[i] = b[i] = i;
c[i] = 0;
}
/* copy inputs to device */
/* fix the parameters needed to copy data to the device */
cudaMemcpy( d_a, a, size, cudaMemcpyHostToDevice );
cudaMemcpy( d_b, b, size, cudaMemcpyHostToDevice );
/* launch the kernel on the GPU */
/* insert the launch parameters to launch the kernel properly using blocks and threads */
vector_add<<< (N + (THREADS_PER_BLOCK-1)) / THREADS_PER_BLOCK, THREADS_PER_BLOCK >>>( d_a, d_b, d_c );
/* copy result back to host */
/* fix the parameters needed to copy data back to the host */
cudaMemcpy( c, d_c, size, cudaMemcpyDeviceToHost );
printf( "c[0] = %d\n",0,c[0] );
printf( "c[%d] = %d\n",N-1, c[N-1] );
/* clean up */
free(a);
free(b);
free(c);
cudaFree( d_a );
cudaFree( d_b );
cudaFree( d_c );
return 0;
} /* end main */

View File

@@ -1,66 +0,0 @@
#include <stdio.h>
__global__ void vector_add(int *a, int *b, int *c)
{
/* insert code to calculate the index properly using blockIdx.x, blockDim.x, threadIdx.x */
int index = blockIdx.x * blockDim.x + threadIdx.x;
c[index] = a[index] + b[index];
}
/* experiment with N */
/* how large can it be? */
#define N (2048*2048)
#define THREADS_PER_BLOCK 512
int main()
{
int *a, *b, *c;
int *d_a, *d_b, *d_c;
int size = N * sizeof( int );
/* allocate space for device copies of a, b, c */
cudaMalloc( (void **) &d_a, size );
cudaMalloc( (void **) &d_b, size );
cudaMalloc( (void **) &d_c, size );
/* allocate space for host copies of a, b, c and setup input values */
a = (int *)malloc( size );
b = (int *)malloc( size );
c = (int *)malloc( size );
for( int i = 0; i < N; i++ )
{
a[i] = b[i] = i;
c[i] = 0;
}
/* copy inputs to device */
/* fix the parameters needed to copy data to the device */
cudaMemcpy( d_a, a, size, cudaMemcpyHostToDevice );
cudaMemcpy( d_b, b, size, cudaMemcpyHostToDevice );
/* launch the kernel on the GPU */
/* insert the launch parameters to launch the kernel properly using blocks and threads */
add<<< (N + (THREADS_PER_BLOCK-1)) / THREADS_PER_BLOCK, THREADS_PER_BLOCK >>>( d_a, d_b, d_c );
/* copy result back to host */
/* fix the parameters needed to copy data back to the host */
cudaMemcpy( c, d_c, size, cudaMemcpyDeviceToHost );
printf( "c[0] = %d\n",0,c[0] );
printf( "c[%d] = %d\n",N-1, c[N-1] );
/* clean up */
free(a);
free(b);
free(c);
cudaFree( d_a );
cudaFree( d_b );
cudaFree( d_c );
return 0;
} /* end main */

Binary file not shown.

View File

@@ -1,44 +0,0 @@
#include "./VectorOperations.h"
#include <chrono>
#include <iostream>
void _hack(float *a)
{
a[0]=a[0];
}
int main()
{
int size=50000000;
float *a= new float[size];
float *b= new float[size];
float *c= new float[size];
double sum = 0;
Shin::Cuda::VectorOperations v(size);
for(int i=0;i<size;i++)
{
a[i]=0.001;
b[i]=2;
c[i]=0;
}
_hack(a);
auto t1 = std::chrono::high_resolution_clock::now();
for(int i=0;i<size;i++)
{
sum+=a[i]*b[i];
}
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "Time 1: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() << "result: " << sum <<std::endl;
for(int i=0;i<size;i++)
c[i]=0;
t1 = std::chrono::high_resolution_clock::now();
v.add(a,b,c,size);
sum=0;
for(int i=0;i<size;i++)
sum+=c[i];
t2 = std::chrono::high_resolution_clock::now();
std::cout << "Time 2: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() << "result: " << sum << std::endl;
}

View File

@@ -1,19 +0,0 @@
#ifndef _GENETICS_EXCEPT_H_
#define _GENETICS_EXCEPT_H_
#include <string>
namespace Shin
{
namespace Genetics
{
class Exception
{
public:
Exception(const std::string &s): str(s) {};
const char* what()const noexcept {return str.c_str();};
protected:
std::string str;
};
}
}
#endif

View File

@@ -1,79 +0,0 @@
#ifndef _GENETICS_GENERATION_H_
#define _GENETICS_GENERATION_H_
#include <vector>
#include <float.h>
#include <cstddef>
#include "Individual.h"
namespace Shin
{
namespace Genetics
{
template <class _T>
class Generation
{
public:
/* constructors and so */
Generation ():individual() {}
Generation (const Generation &old):individual(old.individual) {}
Generation& operator=(const Generation &g) {individual=g.individual; return *this;}
/* generation manipulation */
size_t size() {return individual.size();}
_T& operator[](const size_t i) { return individual[i];}
void add(const _T &a) { individual.push_back(a);}
inline auto begin() {return individual.begin();};
inline auto end() {return individual.end();};
/* different fitness counting */
double absoluteFitness()
{
return 0.0;
}
double relativeFiness()
{
return 0.0;
}
double bestFitness()
{
double f=DBL_MIN;
for(_T &a:individual)
if(f < a.getFitness())
f=a.getFitness();
return f;
}
double worstFitness()
{
double f=DBL_MAX;
for(_T &a:individual)
if(f > a.getFitness())
f=a.getFitness();
return f;
}
double averageFitness()
{
double f=0;
for(_T &a:individual)
f+=a.getFitness();
return f/individual.size();
}
protected:
std::vector<_T> individual;
};
}
}
#endif

View File

@@ -1,95 +0,0 @@
#ifndef _GENETICS_GENCREATOR_H_
#define _GENETICS_GENCREATOR_H_
#include "Generation.h"
#include "Individual.h"
namespace Shin
{
namespace Genetics
{
template <class _T>
class GenerationCreator
{
public:
Generation<_T> operator()(Generation<_T> &g) {return generate(g);}
virtual Generation<_T> generate(Generation<_T> &gen)=0;
virtual ~GenerationCreator() {}
void setMaxGenerationSize(const unsigned size) {maxGenerationSize=size;}
protected:
unsigned maxGenerationSize=100;
/* static void run(GenerationCreater* r,unsigned long from, unsigned long to,unsigned long fitness, Generation *gen, Generation *s)
{ r->runner(from,to,fitness,gen,s); }
virtual void runner(unsigned long from, unsigned long to,unsigned long fitness, Generation *gen, Generation *s)=0;
*/
};
template <class _T>
class Roulete: public GenerationCreator<_T>
{
public:
Generation<_T> generate(Generation<_T> &gen) override;
protected:
};
}
}
template <class _T>
Shin::Genetics::Generation< _T > Shin::Genetics::Roulete<_T>::generate(Shin::Genetics::Generation< _T >& gen)
{
Generation<_T> newGen;
long fitness=0;
for(_T &a:gen)
{
fitness+=a.fitness();
}
double avFitness=(gen.bestFitness()+gen.averageFitness())/2;
for(unsigned int i=0;i<gen.size() && i < this->maxGenerationSize;i++)
{
if(gen[i].getFitness() >= avFitness)
{
newGen.add(gen[i]);
if(rand()%20==0)
{
newGen.add(gen[i].combine(gen[i]));
newGen[newGen.size()-1].mutate();
}
}
}
if(fitness==0)
fitness++;
while(newGen.size()< this->maxGenerationSize)
{
unsigned int x=rand()%(fitness+1);
unsigned int y=rand()%(fitness+1);
unsigned int xIndividual=0;
unsigned int yIndividual=0;
while( x > 0 && xIndividual < gen.size())
{
x-=gen[xIndividual].fitness();
if(x>0)
++xIndividual;
}
while(y>0 && yIndividual <gen.size())
{
y-=gen[yIndividual].getFitness();
if(y>0)
yIndividual++;
}
xIndividual=xIndividual%(gen.size());
yIndividual=yIndividual%(gen.size());
newGen.add(gen[xIndividual].combine(gen[yIndividual]));
if(rand()%20==0)
newGen[newGen.size()-1].mutate();
}
return newGen;
}
#endif

View File

@@ -1 +0,0 @@
././Genetics.h

View File

@@ -1,98 +0,0 @@
#include "Genetics"
using namespace Shin::Genetics;
/*
Generation Roulete::generate(Generation& gen)
{
Generation s;
long fitness=0;
for (unsigned int i=0;i<gen.size();i++)
fitness+=abs(gen[i]->getFitness());
unsigned int size=150;
double avFitness=(gen.bestFitness()+gen.averageFitness())/2;
for(unsigned int i=0;i<gen.size() && i < size*2/3;i++)
{
if(gen[i]->getFitness() > avFitness)
{
s.add(gen[i]);
if(rand()%20==0)
{
s.add(gen[i]->combine(gen[i]));
s[s.size()-1]->mutate();
}
}
}
size-=s.size();
if(size>gen.size())
size=gen.size();//+(gen.size()+1)/2;
if(fitness==0)
fitness++;
std::vector <std::thread *> threads;
if((size > 1000))
{
long step=size/4;
for(int i=0;i<4;i++)
{
std::cout << "f: "<< (i*step) << ", t: "<< ((i+1)*step) << "\n";
std::thread *a= new std::thread(run,this,i*step,i==3?(size+1):((i+1)*step+1),fitness,&gen,&s);
// a->join();
// delete a;
threads.push_back(a);
}
}else
{
this->runner(0,size+1,fitness,&gen,&s);
// std::thread *a= new std::thread(run,this,0,size+1,fitness,&gen,&s);
// threads.push_back(a);
}
for(std::thread *a:threads)
{
a->join();
delete a;
}
return s;
}
void Roulete::runner(long unsigned int from, long unsigned int to,unsigned long fitness, Generation *gen, Generation *s)
{
for(int i=from;i<to;i++)
{
unsigned int x=rand()%(fitness+1);
unsigned int y=rand()%(fitness+1);
unsigned int xIndividual=0;
unsigned int yIndividual=0;
while( x > 0 && xIndividual < gen->size())
{
x-=gen->operator[](xIndividual)->getFitness();
if(x>0)
++xIndividual;
}
while(y>0 && yIndividual <gen->size())
{
y-=gen->operator[](yIndividual)->getFitness();
if(y>0)
yIndividual++;
}
xIndividual=xIndividual%(gen->size());
yIndividual=yIndividual%(gen->size());
// std::cout << fitness << " - " << xIndividual <<" - " << yIndividual << "\n";
s->add(gen->operator[](xIndividual)->combine(gen->operator[](yIndividual)));
if(rand()%20==0)
s->operator[](s->size()-1)->mutate();
}
}
*/

View File

@@ -1,78 +0,0 @@
#ifndef _GENETICS_H_
#define _GENETICS_H_
#include "Generation.h"
#include "Except.h"
#include "GenerationCreater.h"
#include <vector>
#include <iostream>
#include <thread>
#include <mutex>
namespace Shin
{
namespace Genetics
{
/*
class Roulete: public GenerationCreater
{
public:
Generation generate(Generation &gen) override;
protected:
virtual void runner(unsigned long from, unsigned long to,unsigned long fitness, Generation *gen, Generation *s);
};
*/
template <class _T, typename _C=Roulete<_T>>
class Genetics
{
public:
Genetics():c(*new _C()),generation(),deleteCreator(1) {}
Genetics(GenerationCreator<_T> *gc):c(gc),generation() {}
virtual ~Genetics()
{
if(deleteCreator)
delete &c;
}
void addIndividual (const _T &ind) { generation.add(ind); }
_T& getSolution(int maxGenerations,int targetFitness)
{
srand(time(NULL));
for(int round=0;round<maxGenerations;round++)
{
makeRound(round);
std::cout << "Round: " << round << " " << generation.size() << " " << generation.bestFitness() <<" - " << generation.worstFitness() << "\n";
for(_T& t:generation)
{
if(t.getFitness()>=targetFitness)
return t;
}
}
double maxFitness=generation.bestFitness();
for(_T& t:generation)
{
if(t.getFitness()==maxFitness)
return t;
};
throw Exception("Error finding individual with best Fitess");
}
void makeRound(const int round)
{
// if(round%500==1)
// srand(time(NULL));
generation=c(generation);
}
GenerationCreator<_T>& getCreator() {return c;};
protected:
GenerationCreator<_T> &c;
Generation<_T> generation;
bool deleteCreator=0;
private:
};
}
}
#endif

View File

@@ -1,3 +0,0 @@
#include "Individual.h"

View File

@@ -1,20 +0,0 @@
#ifndef _GENETICS_INDIVIDUAL_H
#define _GENETICS_INDIVIDUAL_H
namespace Shin
{
namespace Genetics
{
class Individual
{
public:
virtual ~Individual() {};
virtual void mutate()=0;
virtual double getFitness()=0;
virtual double fitness() final {return getFitness();}
private:
};
}
}
#endif // INDIVIDUAL_H

View File

@@ -1,21 +0,0 @@
OBJFILES=./Individual.o ./Genetics.o
LIBNAME=Genetics
include ../../Makefile.const
all: lib
lib: $(LIBNAME).so $(LIBNAME).a
$(LIBNAME).so: $(OBJFILES) ./Genetics.h ./Generation.h ./GenerationCreater.h
$(CXX) -shared $(CXXFLAGS) $(OBJFILES) -o $(LIBNAME).so -lpthread
$(LIBNAME).a: $(OBJFILES) ./Genetics.h ./Generation.h ./GenerationCreater.h
rm -f $(LIBNAME).a # create new library
ar rcv $(LIBNAME).a $(OBJFILES)
ranlib $(LIBNAME).a
nm --demangle $(LIBNAME).a > $(LIBNAME).nm
clean:
@rm -f ./*.o ./*.so ./*.a ./*.nm

View File

@@ -29,7 +29,6 @@ FFNeuron& FFLayer::operator[](const size_t& neuron)
throw std::out_of_range("Not so many neurons in layers.");
return *neurons[neuron];
}
@@ -86,12 +85,14 @@ FeedForward::~FeedForward()
delete[] potentials[i];
delete[] outputs[i];
delete[] inputs[i];
delete transfer[i];
}
delete[] weights;
delete[] potentials;
delete[] layerSizes;
delete[] outputs;
delete[] inputs;
delete[] transfer;
}
if(ffLayers !=nullptr)
{

View File

@@ -115,7 +115,7 @@ namespace NeuralNetwork
*/
FeedForward(const FeedForward &f) = delete; //TODO
/**
* @brief we don't want to allow network to be copied
* @brief we don't want to allow network to be assigned
*/
FeedForward operator=(const FeedForward &f)=delete;