sse4.2 ready FFQ and Backpropagation changed to fit new FeedForward
This commit is contained in:
@@ -22,7 +22,7 @@ FFNeuron* FFLayer::operator[](int neuron)
|
||||
neurons=new FFNeuron*[layerSize];
|
||||
for(size_t i=0;i<layerSize;i++)
|
||||
{
|
||||
neurons[i]=new FFNeuron(&potentials[i],weights[i],&sums[i],lambda);
|
||||
neurons[i]=new FFNeuron(potentials[i],weights[i],sums[i],inputs[i],lambda);
|
||||
}
|
||||
}
|
||||
return neurons[neuron];
|
||||
@@ -34,14 +34,14 @@ FeedForwardNetworkQuick::~FeedForwardNetworkQuick()
|
||||
{
|
||||
for(size_t i=0;i<layers;i++)
|
||||
{
|
||||
for (size_t j=0;j<layerSizes[i];j++)
|
||||
for (size_t j=1;j<layerSizes[i];j++)
|
||||
{
|
||||
if(j!=0)
|
||||
delete[] weights[i][j];
|
||||
}
|
||||
delete[] weights[i];
|
||||
delete[] potentials[i];
|
||||
delete[] sums[i];
|
||||
if(i!=layers-1)
|
||||
delete[] sums[i];
|
||||
}
|
||||
delete[] sums[layers];
|
||||
delete[] weights;
|
||||
@@ -59,23 +59,58 @@ FeedForwardNetworkQuick::~FeedForwardNetworkQuick()
|
||||
}
|
||||
}
|
||||
|
||||
#define _LOOP(FROM,TO) {}
|
||||
|
||||
void FeedForwardNetworkQuick::solvePart(float *newSolution, size_t begin, size_t steps,size_t prevSize, float *sol,size_t layer)
|
||||
void FeedForwardNetworkQuick::solvePart(float *newSolution, register size_t begin, size_t end,size_t prevSize, float *sol,size_t layer)
|
||||
{
|
||||
register size_t end=begin+steps;
|
||||
for( size_t j=begin;j<end;j++)
|
||||
if(prevSize >)
|
||||
{
|
||||
newSolution[j]=sol[0]*weights[layer][j][0];
|
||||
for(register size_t k=1;k<prevSize;k++)
|
||||
for( size_t j=begin;j<end;j++)
|
||||
{
|
||||
if(layer==0)
|
||||
register size_t alignedPrev=prevSize>8?(prevSize-(prevSize%8)):0;
|
||||
__m128 partialSolution = _mm_setzero_ps();
|
||||
__m128 partialSolution2 = _mm_setzero_ps();
|
||||
__m128 w;
|
||||
__m128 sols;
|
||||
__m128 w2;
|
||||
__m128 sols2;
|
||||
for(register size_t k=0;k<alignedPrev;k+=8)
|
||||
{
|
||||
newSolution[j]+=sol[k]*weights[layer][j][k];
|
||||
}else
|
||||
{
|
||||
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[layer][j][k];
|
||||
w = _mm_load_ps(this->weights[layer][j]+k);
|
||||
w2 = _mm_load_ps(this->weights[layer][j]+k+4);
|
||||
sols = _mm_load_ps(sol+k);
|
||||
sols2 = _mm_load_ps(sol+k+4);
|
||||
w=_mm_mul_ps(w,sols);
|
||||
w2=_mm_mul_ps(w2,sols2);
|
||||
partialSolution=_mm_add_ps(partialSolution,w);
|
||||
partialSolution2=_mm_add_ps(partialSolution2,w2);
|
||||
}
|
||||
partialSolution = _mm_hadd_ps(partialSolution, partialSolution2);
|
||||
partialSolution = _mm_hadd_ps(partialSolution, partialSolution);
|
||||
partialSolution = _mm_hadd_ps(partialSolution, partialSolution);
|
||||
_mm_store_ss(inputs[layer]+j,partialSolution);
|
||||
for(register size_t k=alignedPrev;k<prevSize;k++)
|
||||
{
|
||||
inputs[layer][j]+=sol[k]*weights[layer][j][k];
|
||||
}
|
||||
partialSolution=_mm_load_ss(inputs[layer]+j);
|
||||
__m128 temporaryConst = _mm_set1_ps(-lambda);
|
||||
partialSolution=_mm_mul_ps(temporaryConst,partialSolution); //-lambda*sol[k]
|
||||
partialSolution=exp_ps(partialSolution); //exp(sols)
|
||||
temporaryConst = _mm_set1_ps(1.0);
|
||||
partialSolution= _mm_add_ps(partialSolution,temporaryConst); //1+exp()
|
||||
partialSolution= _mm_div_ps(temporaryConst,partialSolution);//1/....*/
|
||||
_mm_store_ss(newSolution+j,partialSolution);
|
||||
}
|
||||
}else
|
||||
{
|
||||
for( size_t j=begin;j<end;j++)
|
||||
{
|
||||
register float tmp=0;
|
||||
for(register size_t k=0;k<prevSize;k++)
|
||||
{
|
||||
tmp+=sol[k]*weights[layer][j][k];
|
||||
}
|
||||
newSolution[j]=(1.0/(1.0+exp(-lambda*tmp)));
|
||||
inputs[layer][j]=tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -94,7 +129,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
|
||||
for(register size_t i=0;i<layers;i++)
|
||||
{
|
||||
float* newSolution= sums[i+1];//new bool[layerSizes[i]];
|
||||
if(threads > 1 && layerSizes[i] > 600) // 600 is an guess about actual size, when creating thread has some speedup
|
||||
if(threads > 1 && (layerSizes[i] > 700 ||prevSize > 700)) // 600 is an guess about actual size, when creating thread has some speedup
|
||||
{
|
||||
std::vector<std::thread> th;
|
||||
size_t s=1;
|
||||
@@ -106,54 +141,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
|
||||
if(s>=layerSizes[i])
|
||||
break;
|
||||
th.push_back(std::thread([i,this,newSolution,prevSize,sol](size_t from, size_t to)->void{
|
||||
_LOOP(from,to<(from+4)?to:(from+4));
|
||||
|
||||
register size_t max= (int)(to-4) < 0?0:(to-4);
|
||||
|
||||
for( size_t j=from+4;j<max;j+=4)
|
||||
{
|
||||
newSolution[j]=sol[0]*weights[i][j][0];
|
||||
newSolution[j+1]=sol[0]*weights[i][j+1][0];
|
||||
newSolution[j+2]=sol[0]*weights[i][j+2][0];
|
||||
newSolution[j+3]=sol[0]*weights[i][j+3][0];
|
||||
|
||||
__m128 partialSolution = _mm_load_ps(newSolution+j);
|
||||
register size_t upper_limit=prevSize-4;
|
||||
|
||||
for(register size_t k=from;k< 4 && k <prevSize;k++)
|
||||
{
|
||||
if(i==0)
|
||||
newSolution[j]+=sol[k]*weights[i][j][k];
|
||||
else
|
||||
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
|
||||
}
|
||||
|
||||
for(register size_t k=4;k<upper_limit;k+=4)
|
||||
{
|
||||
__m128 w = _mm_loadr_ps((this->weights[i][j])+k);
|
||||
__m128 sols = _mm_loadr_ps(sol+k);
|
||||
if(i!=0)
|
||||
{
|
||||
__m128 tmp = _mm_set1_ps(-lambda);
|
||||
sols=_mm_mul_ps(tmp,sols); //-lambda*sol[k]
|
||||
sols=exp_ps(sols); //exp(sols)
|
||||
tmp = _mm_set1_ps(1.0);
|
||||
sols= _mm_add_ps(sols,tmp); //1+exp()
|
||||
sols= _mm_div_ps(tmp,sols);//1/....
|
||||
}
|
||||
w=_mm_mul_ps(w,sols);
|
||||
partialSolution=_mm_add_ps(partialSolution,w);
|
||||
}
|
||||
for(register size_t k=upper_limit;k<prevSize;k++)
|
||||
{
|
||||
if(i==0)
|
||||
newSolution[j]+=sol[k]*weights[i][j][k];
|
||||
else
|
||||
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
|
||||
}
|
||||
}
|
||||
if(max!=0)
|
||||
_LOOP(max,to);
|
||||
solvePart(newSolution,from,to,prevSize,sol,i);
|
||||
},s,t==threads?layerSizes[i]:s+step));//{}
|
||||
s+=step;
|
||||
}
|
||||
@@ -162,56 +150,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
|
||||
thr.join();
|
||||
}else
|
||||
{
|
||||
if(1)
|
||||
{
|
||||
solvePart(newSolution,1,layerSizes[i]-1,prevSize,sol,i);
|
||||
}else
|
||||
{
|
||||
_LOOP(1,layerSizes[i]<4?layerSizes[i]:4);
|
||||
register size_t max= (int)(layerSizes[i]-4) < 0?0:(layerSizes[i]-4);
|
||||
for( size_t j=4;j<max;j=j+4)
|
||||
{
|
||||
newSolution[j]=sol[0]*weights[i][j][0];
|
||||
newSolution[j+1]=sol[0]*weights[i][j+1][0];
|
||||
newSolution[j+2]=sol[0]*weights[i][j+2][0];
|
||||
newSolution[j+3]=sol[0]*weights[i][j+3][0];
|
||||
|
||||
__m128 partialSolution = _mm_load_ps(newSolution+j);
|
||||
register size_t upper_limit=prevSize-prevSize%4;
|
||||
for(register size_t k=1;k<prevSize && k <4;k++)
|
||||
{
|
||||
if(i==0)
|
||||
newSolution[j]+=sol[k]*weights[i][j][k];
|
||||
else
|
||||
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
|
||||
}
|
||||
for(register size_t k=4;k<upper_limit;k+=4)
|
||||
{
|
||||
__m128 w = _mm_loadr_ps((this->weights[i][j])+k);
|
||||
__m128 sols = _mm_loadr_ps(sol+k);
|
||||
if(i!=0)
|
||||
{
|
||||
__m128 tmp = _mm_set1_ps(-lambda);
|
||||
sols=_mm_mul_ps(tmp,sols); //-lambda*sol[k]
|
||||
sols=exp_ps(sols); //exp(sols)
|
||||
tmp = _mm_set1_ps(1.0);
|
||||
sols= _mm_add_ps(sols,tmp); //1+exp()
|
||||
sols= _mm_div_ps(tmp,sols);//1/....
|
||||
}
|
||||
w=_mm_mul_ps(w,sols);
|
||||
partialSolution=_mm_add_ps(partialSolution,w);
|
||||
}
|
||||
for(register size_t k=upper_limit;k<prevSize;k++)
|
||||
{
|
||||
if(i==0)
|
||||
newSolution[j]+=sol[k]*weights[i][j][k];
|
||||
else
|
||||
newSolution[j]+=(1.0/(1.0+exp(-lambda*sol[k])))*weights[i][j][k];
|
||||
}
|
||||
}
|
||||
if(max!=0)
|
||||
_LOOP(max,layerSizes[i]);
|
||||
}
|
||||
solvePart(newSolution,1,layerSizes[i],prevSize,sol,i);
|
||||
}
|
||||
prevSize=layerSizes[i];
|
||||
sol=newSolution;
|
||||
@@ -219,7 +158,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
|
||||
std::vector<double> ret;
|
||||
for(size_t i=1;i<prevSize;i++)
|
||||
{
|
||||
ret.push_back((1.0/(1.0+exp(-lambda*sol[i]))));
|
||||
ret.push_back(sol[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -231,7 +170,7 @@ FFLayer* FeedForwardNetworkQuick::operator[](int l)
|
||||
ffLayers=new FFLayer*[layers];
|
||||
for(size_t i=0;i<layers;i++)
|
||||
{
|
||||
ffLayers[i]=new FFLayer(layerSizes[i],potentials[i],weights[i],sums[i+1],lambda);
|
||||
ffLayers[i]=new FFLayer(layerSizes[i],potentials[i],weights[i],sums[i+1],inputs[i],lambda);
|
||||
}
|
||||
}
|
||||
return ffLayers[l];
|
||||
|
||||
@@ -31,19 +31,20 @@ namespace NeuronNetwork
|
||||
FFNeuron() = delete;
|
||||
FFNeuron(const FFNeuron&) = delete;
|
||||
FFNeuron& operator=(const FFNeuron&) = delete;
|
||||
FFNeuron(float *pot, float *w, float*s,float lam):potential(pot),weights(w),sum(s),lambda(lam) { }
|
||||
FFNeuron(float &pot, float *w, float &s, float &i,float lam):potential(pot),weights(w),sum(s),inputs(i),lambda(lam) { }
|
||||
|
||||
float getPotential() {return *potential;}
|
||||
void setPotential(double p) { *potential=p;}
|
||||
float getPotential() {return potential;}
|
||||
void setPotential(double p) { potential=p;}
|
||||
float getWeight(unsigned int i ) { return weights[i];}
|
||||
void setWeight(unsigned int i,float p) { weights[i]=p; }
|
||||
inline float output() const { return 1.0/(1.0+(exp(-lambda*input()))); }
|
||||
inline float input() const { return *sum; }
|
||||
inline float output() const { return sum; }
|
||||
inline float input() const { return inputs; }
|
||||
inline float derivatedOutput() const { return lambda*output()*(1.0-output()); }
|
||||
protected:
|
||||
float *potential;
|
||||
float &potential;
|
||||
float *weights;
|
||||
float *sum;
|
||||
float ∑
|
||||
float &inputs;
|
||||
float lambda;
|
||||
private:
|
||||
};
|
||||
@@ -53,7 +54,7 @@ namespace NeuronNetwork
|
||||
public:
|
||||
FFLayer(const FFLayer &) =delete;
|
||||
FFLayer operator=(const FFLayer &) = delete;
|
||||
FFLayer(size_t s, float *p,float **w,float *su,float lam): neurons(nullptr),layerSize(s),potentials(p),weights(w),sums(su),lambda(lam) {}
|
||||
FFLayer(size_t s, float *p,float **w,float *su,float *in,float lam): neurons(nullptr),layerSize(s),potentials(p),weights(w),sums(su),inputs(in),lambda(lam) {}
|
||||
~FFLayer();
|
||||
FFNeuron* operator[](int neuron);
|
||||
size_t size() const {return layerSize;};
|
||||
@@ -63,6 +64,7 @@ namespace NeuronNetwork
|
||||
float *potentials;
|
||||
float **weights;
|
||||
float *sums;
|
||||
float *inputs;
|
||||
float lambda;
|
||||
};
|
||||
|
||||
@@ -77,7 +79,7 @@ namespace NeuronNetwork
|
||||
potentials= new float*[s.size()];
|
||||
layerSizes= new size_t[s.size()];
|
||||
sums= new float*[s.size()+1];
|
||||
inputs= new float*[s.size()+1];
|
||||
inputs= new float*[s.size()];
|
||||
int i=0;
|
||||
int prev_size=1;
|
||||
for(int layeSize:s) // TODO rename
|
||||
@@ -93,7 +95,7 @@ namespace NeuronNetwork
|
||||
weights[i]= new float*[layeSize];
|
||||
potentials[i]= new float[layeSize];
|
||||
sums[i+1]= new float[layeSize];
|
||||
inputs[i+1]= new float[layeSize];
|
||||
inputs[i]= new float[layeSize];
|
||||
potentials[i][0]=1.0;
|
||||
sums[i+1][0]=1.0;
|
||||
for (int j=1;j<layeSize;j++)
|
||||
@@ -115,14 +117,14 @@ namespace NeuronNetwork
|
||||
FFLayer* operator[](int l);
|
||||
void setThreads(unsigned t) {threads=t;}
|
||||
protected:
|
||||
void solvePart(float *newSolution, size_t begin, size_t steps,size_t prevSize, float* sol,size_t layer);
|
||||
void solvePart(float *newSolution, size_t begin, size_t end,size_t prevSize, float* sol,size_t layer);
|
||||
private:
|
||||
FFLayer **ffLayers;
|
||||
float ***weights;
|
||||
float **potentials;
|
||||
public:
|
||||
float **sums;
|
||||
float **inputs;
|
||||
public:
|
||||
private:
|
||||
size_t *layerSizes;
|
||||
size_t layers;
|
||||
|
||||
@@ -79,7 +79,7 @@ void Shin::NeuronNetwork::Learning::BackPropagation::propagate(const Shin::Neuro
|
||||
{
|
||||
network[i]->operator[](j)->setWeight(k,
|
||||
network[i]->operator[](j)->getWeight(k)+learningCoeficient* deltas[i][j]*
|
||||
(i==0? network.sums[0][k]:(float)network[i-1]->operator[](k)->output()));
|
||||
(i==0? network.sums[0][k]:network[i-1]->operator[](k)->output()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,23 +31,18 @@ int main(int argc)
|
||||
s.push_back(Shin::NeuronNetwork::Solution(std::vector<double>({0})));
|
||||
p.push_back(X(std::vector<bool>({1})));
|
||||
|
||||
Shin::NeuronNetwork::FeedForwardNetworkQuick q({1,5000,5000,5000,5000});
|
||||
Shin::NeuronNetwork::FeedForwardNetworkQuick q({1,20000,20000,20000});
|
||||
Shin::NeuronNetwork::Learning::BackPropagation b(q);
|
||||
if(argc > 1)
|
||||
{
|
||||
std::cerr << "THREADING\n";
|
||||
q.setThreads(4);
|
||||
}
|
||||
for(int i=0;i<5;i++)
|
||||
for(int i=0;i<100;i++)
|
||||
{
|
||||
//b.teach(p[i%2],s[i%2]);
|
||||
std::cerr << i%2 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n";
|
||||
}
|
||||
|
||||
for(int i=0;i<5;i++)
|
||||
{
|
||||
//b.teach(p[i%2],s[i%2]);
|
||||
std::cerr << i%2 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n";
|
||||
q.solve(p[i%2])[0];
|
||||
//std::cerr << i%2 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n";
|
||||
}
|
||||
for(int i=0;i<2;i++)
|
||||
{
|
||||
|
||||
@@ -22,7 +22,7 @@ int main()
|
||||
|
||||
for (int test=0;test<2;test++)
|
||||
{
|
||||
Shin::NeuronNetwork::FeedForwardNetworkQuick q({2,4,1});
|
||||
Shin::NeuronNetwork::FeedForwardNetworkQuick q({2,40,1});
|
||||
Shin::NeuronNetwork::Learning::BackPropagation b(q);
|
||||
|
||||
srand(time(NULL));
|
||||
|
||||
Reference in New Issue
Block a user