sse4.2 ready FFQ and Backpropagation changed to fit new FeedForward

This commit is contained in:
2014-11-15 00:52:12 +01:00
parent 23452bcec5
commit dddf96da5b
5 changed files with 76 additions and 140 deletions

View File

@@ -22,7 +22,7 @@ FFNeuron* FFLayer::operator[](int neuron)
neurons=new FFNeuron*[layerSize]; neurons=new FFNeuron*[layerSize];
for(size_t i=0;i<layerSize;i++) for(size_t i=0;i<layerSize;i++)
{ {
neurons[i]=new FFNeuron(&potentials[i],weights[i],&sums[i],lambda); neurons[i]=new FFNeuron(potentials[i],weights[i],sums[i],inputs[i],lambda);
} }
} }
return neurons[neuron]; return neurons[neuron];
@@ -34,13 +34,13 @@ FeedForwardNetworkQuick::~FeedForwardNetworkQuick()
{ {
for(size_t i=0;i<layers;i++) for(size_t i=0;i<layers;i++)
{ {
for (size_t j=0;j<layerSizes[i];j++) for (size_t j=1;j<layerSizes[i];j++)
{ {
if(j!=0)
delete[] weights[i][j]; delete[] weights[i][j];
} }
delete[] weights[i]; delete[] weights[i];
delete[] potentials[i]; delete[] potentials[i];
if(i!=layers-1)
delete[] sums[i]; delete[] sums[i];
} }
delete[] sums[layers]; delete[] sums[layers];
@@ -59,23 +59,58 @@ FeedForwardNetworkQuick::~FeedForwardNetworkQuick()
} }
} }
#define _LOOP(FROM,TO) {} void FeedForwardNetworkQuick::solvePart(float *newSolution, register size_t begin, size_t end,size_t prevSize, float *sol,size_t layer)
void FeedForwardNetworkQuick::solvePart(float *newSolution, size_t begin, size_t steps,size_t prevSize, float *sol,size_t layer)
{ {
register size_t end=begin+steps; if(prevSize >)
{
for( size_t j=begin;j<end;j++) for( size_t j=begin;j<end;j++)
{ {
newSolution[j]=sol[0]*weights[layer][j][0]; register size_t alignedPrev=prevSize>8?(prevSize-(prevSize%8)):0;
for(register size_t k=1;k<prevSize;k++) __m128 partialSolution = _mm_setzero_ps();
__m128 partialSolution2 = _mm_setzero_ps();
__m128 w;
__m128 sols;
__m128 w2;
__m128 sols2;
for(register size_t k=0;k<alignedPrev;k+=8)
{ {
if(layer==0) w = _mm_load_ps(this->weights[layer][j]+k);
w2 = _mm_load_ps(this->weights[layer][j]+k+4);
sols = _mm_load_ps(sol+k);
sols2 = _mm_load_ps(sol+k+4);
w=_mm_mul_ps(w,sols);
w2=_mm_mul_ps(w2,sols2);
partialSolution=_mm_add_ps(partialSolution,w);
partialSolution2=_mm_add_ps(partialSolution2,w2);
}
partialSolution = _mm_hadd_ps(partialSolution, partialSolution2);
partialSolution = _mm_hadd_ps(partialSolution, partialSolution);
partialSolution = _mm_hadd_ps(partialSolution, partialSolution);
_mm_store_ss(inputs[layer]+j,partialSolution);
for(register size_t k=alignedPrev;k<prevSize;k++)
{ {
newSolution[j]+=sol[k]*weights[layer][j][k]; inputs[layer][j]+=sol[k]*weights[layer][j][k];
}
partialSolution=_mm_load_ss(inputs[layer]+j);
__m128 temporaryConst = _mm_set1_ps(-lambda);
partialSolution=_mm_mul_ps(temporaryConst,partialSolution); //-lambda*sol[k]
partialSolution=exp_ps(partialSolution); //exp(sols)
temporaryConst = _mm_set1_ps(1.0);
partialSolution= _mm_add_ps(partialSolution,temporaryConst); //1+exp()
partialSolution= _mm_div_ps(temporaryConst,partialSolution);//1/....*/
_mm_store_ss(newSolution+j,partialSolution);
}
}else }else
{ {
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[layer][j][k]; for( size_t j=begin;j<end;j++)
{
register float tmp=0;
for(register size_t k=0;k<prevSize;k++)
{
tmp+=sol[k]*weights[layer][j][k];
} }
newSolution[j]=(1.0/(1.0+exp(-lambda*tmp)));
inputs[layer][j]=tmp;
} }
} }
} }
@@ -94,7 +129,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
for(register size_t i=0;i<layers;i++) for(register size_t i=0;i<layers;i++)
{ {
float* newSolution= sums[i+1];//new bool[layerSizes[i]]; float* newSolution= sums[i+1];//new bool[layerSizes[i]];
if(threads > 1 && layerSizes[i] > 600) // 600 is an guess about actual size, when creating thread has some speedup if(threads > 1 && (layerSizes[i] > 700 ||prevSize > 700)) // 600 is an guess about actual size, when creating thread has some speedup
{ {
std::vector<std::thread> th; std::vector<std::thread> th;
size_t s=1; size_t s=1;
@@ -106,54 +141,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
if(s>=layerSizes[i]) if(s>=layerSizes[i])
break; break;
th.push_back(std::thread([i,this,newSolution,prevSize,sol](size_t from, size_t to)->void{ th.push_back(std::thread([i,this,newSolution,prevSize,sol](size_t from, size_t to)->void{
_LOOP(from,to<(from+4)?to:(from+4)); solvePart(newSolution,from,to,prevSize,sol,i);
register size_t max= (int)(to-4) < 0?0:(to-4);
for( size_t j=from+4;j<max;j+=4)
{
newSolution[j]=sol[0]*weights[i][j][0];
newSolution[j+1]=sol[0]*weights[i][j+1][0];
newSolution[j+2]=sol[0]*weights[i][j+2][0];
newSolution[j+3]=sol[0]*weights[i][j+3][0];
__m128 partialSolution = _mm_load_ps(newSolution+j);
register size_t upper_limit=prevSize-4;
for(register size_t k=from;k< 4 && k <prevSize;k++)
{
if(i==0)
newSolution[j]+=sol[k]*weights[i][j][k];
else
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
}
for(register size_t k=4;k<upper_limit;k+=4)
{
__m128 w = _mm_loadr_ps((this->weights[i][j])+k);
__m128 sols = _mm_loadr_ps(sol+k);
if(i!=0)
{
__m128 tmp = _mm_set1_ps(-lambda);
sols=_mm_mul_ps(tmp,sols); //-lambda*sol[k]
sols=exp_ps(sols); //exp(sols)
tmp = _mm_set1_ps(1.0);
sols= _mm_add_ps(sols,tmp); //1+exp()
sols= _mm_div_ps(tmp,sols);//1/....
}
w=_mm_mul_ps(w,sols);
partialSolution=_mm_add_ps(partialSolution,w);
}
for(register size_t k=upper_limit;k<prevSize;k++)
{
if(i==0)
newSolution[j]+=sol[k]*weights[i][j][k];
else
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
}
}
if(max!=0)
_LOOP(max,to);
},s,t==threads?layerSizes[i]:s+step));//{} },s,t==threads?layerSizes[i]:s+step));//{}
s+=step; s+=step;
} }
@@ -162,56 +150,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
thr.join(); thr.join();
}else }else
{ {
if(1) solvePart(newSolution,1,layerSizes[i],prevSize,sol,i);
{
solvePart(newSolution,1,layerSizes[i]-1,prevSize,sol,i);
}else
{
_LOOP(1,layerSizes[i]<4?layerSizes[i]:4);
register size_t max= (int)(layerSizes[i]-4) < 0?0:(layerSizes[i]-4);
for( size_t j=4;j<max;j=j+4)
{
newSolution[j]=sol[0]*weights[i][j][0];
newSolution[j+1]=sol[0]*weights[i][j+1][0];
newSolution[j+2]=sol[0]*weights[i][j+2][0];
newSolution[j+3]=sol[0]*weights[i][j+3][0];
__m128 partialSolution = _mm_load_ps(newSolution+j);
register size_t upper_limit=prevSize-prevSize%4;
for(register size_t k=1;k<prevSize && k <4;k++)
{
if(i==0)
newSolution[j]+=sol[k]*weights[i][j][k];
else
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
}
for(register size_t k=4;k<upper_limit;k+=4)
{
__m128 w = _mm_loadr_ps((this->weights[i][j])+k);
__m128 sols = _mm_loadr_ps(sol+k);
if(i!=0)
{
__m128 tmp = _mm_set1_ps(-lambda);
sols=_mm_mul_ps(tmp,sols); //-lambda*sol[k]
sols=exp_ps(sols); //exp(sols)
tmp = _mm_set1_ps(1.0);
sols= _mm_add_ps(sols,tmp); //1+exp()
sols= _mm_div_ps(tmp,sols);//1/....
}
w=_mm_mul_ps(w,sols);
partialSolution=_mm_add_ps(partialSolution,w);
}
for(register size_t k=upper_limit;k<prevSize;k++)
{
if(i==0)
newSolution[j]+=sol[k]*weights[i][j][k];
else
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
}
}
if(max!=0)
_LOOP(max,layerSizes[i]);
}
} }
prevSize=layerSizes[i]; prevSize=layerSizes[i];
sol=newSolution; sol=newSolution;
@@ -219,7 +158,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
std::vector<double> ret; std::vector<double> ret;
for(size_t i=1;i<prevSize;i++) for(size_t i=1;i<prevSize;i++)
{ {
ret.push_back((1.0/(1.0+exp(-lambda*sol[i])))); ret.push_back(sol[i]);
} }
return ret; return ret;
} }
@@ -231,7 +170,7 @@ FFLayer* FeedForwardNetworkQuick::operator[](int l)
ffLayers=new FFLayer*[layers]; ffLayers=new FFLayer*[layers];
for(size_t i=0;i<layers;i++) for(size_t i=0;i<layers;i++)
{ {
ffLayers[i]=new FFLayer(layerSizes[i],potentials[i],weights[i],sums[i+1],lambda); ffLayers[i]=new FFLayer(layerSizes[i],potentials[i],weights[i],sums[i+1],inputs[i],lambda);
} }
} }
return ffLayers[l]; return ffLayers[l];

View File

@@ -31,19 +31,20 @@ namespace NeuronNetwork
FFNeuron() = delete; FFNeuron() = delete;
FFNeuron(const FFNeuron&) = delete; FFNeuron(const FFNeuron&) = delete;
FFNeuron& operator=(const FFNeuron&) = delete; FFNeuron& operator=(const FFNeuron&) = delete;
FFNeuron(float *pot, float *w, float*s,float lam):potential(pot),weights(w),sum(s),lambda(lam) { } FFNeuron(float &pot, float *w, float &s, float &i,float lam):potential(pot),weights(w),sum(s),inputs(i),lambda(lam) { }
float getPotential() {return *potential;} float getPotential() {return potential;}
void setPotential(double p) { *potential=p;} void setPotential(double p) { potential=p;}
float getWeight(unsigned int i ) { return weights[i];} float getWeight(unsigned int i ) { return weights[i];}
void setWeight(unsigned int i,float p) { weights[i]=p; } void setWeight(unsigned int i,float p) { weights[i]=p; }
inline float output() const { return 1.0/(1.0+(exp(-lambda*input()))); } inline float output() const { return sum; }
inline float input() const { return *sum; } inline float input() const { return inputs; }
inline float derivatedOutput() const { return lambda*output()*(1.0-output()); } inline float derivatedOutput() const { return lambda*output()*(1.0-output()); }
protected: protected:
float *potential; float &potential;
float *weights; float *weights;
float *sum; float &sum;
float &inputs;
float lambda; float lambda;
private: private:
}; };
@@ -53,7 +54,7 @@ namespace NeuronNetwork
public: public:
FFLayer(const FFLayer &) =delete; FFLayer(const FFLayer &) =delete;
FFLayer operator=(const FFLayer &) = delete; FFLayer operator=(const FFLayer &) = delete;
FFLayer(size_t s, float *p,float **w,float *su,float lam): neurons(nullptr),layerSize(s),potentials(p),weights(w),sums(su),lambda(lam) {} FFLayer(size_t s, float *p,float **w,float *su,float *in,float lam): neurons(nullptr),layerSize(s),potentials(p),weights(w),sums(su),inputs(in),lambda(lam) {}
~FFLayer(); ~FFLayer();
FFNeuron* operator[](int neuron); FFNeuron* operator[](int neuron);
size_t size() const {return layerSize;}; size_t size() const {return layerSize;};
@@ -63,6 +64,7 @@ namespace NeuronNetwork
float *potentials; float *potentials;
float **weights; float **weights;
float *sums; float *sums;
float *inputs;
float lambda; float lambda;
}; };
@@ -77,7 +79,7 @@ namespace NeuronNetwork
potentials= new float*[s.size()]; potentials= new float*[s.size()];
layerSizes= new size_t[s.size()]; layerSizes= new size_t[s.size()];
sums= new float*[s.size()+1]; sums= new float*[s.size()+1];
inputs= new float*[s.size()+1]; inputs= new float*[s.size()];
int i=0; int i=0;
int prev_size=1; int prev_size=1;
for(int layeSize:s) // TODO rename for(int layeSize:s) // TODO rename
@@ -93,7 +95,7 @@ namespace NeuronNetwork
weights[i]= new float*[layeSize]; weights[i]= new float*[layeSize];
potentials[i]= new float[layeSize]; potentials[i]= new float[layeSize];
sums[i+1]= new float[layeSize]; sums[i+1]= new float[layeSize];
inputs[i+1]= new float[layeSize]; inputs[i]= new float[layeSize];
potentials[i][0]=1.0; potentials[i][0]=1.0;
sums[i+1][0]=1.0; sums[i+1][0]=1.0;
for (int j=1;j<layeSize;j++) for (int j=1;j<layeSize;j++)
@@ -115,14 +117,14 @@ namespace NeuronNetwork
FFLayer* operator[](int l); FFLayer* operator[](int l);
void setThreads(unsigned t) {threads=t;} void setThreads(unsigned t) {threads=t;}
protected: protected:
void solvePart(float *newSolution, size_t begin, size_t steps,size_t prevSize, float* sol,size_t layer); void solvePart(float *newSolution, size_t begin, size_t end,size_t prevSize, float* sol,size_t layer);
private: private:
FFLayer **ffLayers; FFLayer **ffLayers;
float ***weights; float ***weights;
float **potentials; float **potentials;
public:
float **sums; float **sums;
float **inputs; float **inputs;
public:
private: private:
size_t *layerSizes; size_t *layerSizes;
size_t layers; size_t layers;

View File

@@ -79,7 +79,7 @@ void Shin::NeuronNetwork::Learning::BackPropagation::propagate(const Shin::Neuro
{ {
network[i]->operator[](j)->setWeight(k, network[i]->operator[](j)->setWeight(k,
network[i]->operator[](j)->getWeight(k)+learningCoeficient* deltas[i][j]* network[i]->operator[](j)->getWeight(k)+learningCoeficient* deltas[i][j]*
(i==0? network.sums[0][k]:(float)network[i-1]->operator[](k)->output())); (i==0? network.sums[0][k]:network[i-1]->operator[](k)->output()));
} }
} }
} }

View File

@@ -31,23 +31,18 @@ int main(int argc)
s.push_back(Shin::NeuronNetwork::Solution(std::vector<double>({0}))); s.push_back(Shin::NeuronNetwork::Solution(std::vector<double>({0})));
p.push_back(X(std::vector<bool>({1}))); p.push_back(X(std::vector<bool>({1})));
Shin::NeuronNetwork::FeedForwardNetworkQuick q({1,5000,5000,5000,5000}); Shin::NeuronNetwork::FeedForwardNetworkQuick q({1,20000,20000,20000});
Shin::NeuronNetwork::Learning::BackPropagation b(q); Shin::NeuronNetwork::Learning::BackPropagation b(q);
if(argc > 1) if(argc > 1)
{ {
std::cerr << "THREADING\n"; std::cerr << "THREADING\n";
q.setThreads(4); q.setThreads(4);
} }
for(int i=0;i<5;i++) for(int i=0;i<100;i++)
{ {
//b.teach(p[i%2],s[i%2]); //b.teach(p[i%2],s[i%2]);
std::cerr << i%2 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n"; q.solve(p[i%2])[0];
} //std::cerr << i%2 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n";
for(int i=0;i<5;i++)
{
//b.teach(p[i%2],s[i%2]);
std::cerr << i%2 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n";
} }
for(int i=0;i<2;i++) for(int i=0;i<2;i++)
{ {

View File

@@ -22,7 +22,7 @@ int main()
for (int test=0;test<2;test++) for (int test=0;test<2;test++)
{ {
Shin::NeuronNetwork::FeedForwardNetworkQuick q({2,4,1}); Shin::NeuronNetwork::FeedForwardNetworkQuick q({2,40,1});
Shin::NeuronNetwork::Learning::BackPropagation b(q); Shin::NeuronNetwork::Learning::BackPropagation b(q);
srand(time(NULL)); srand(time(NULL));