loooot of fixes nad SSE enhacement

This commit is contained in:
2014-11-18 11:09:34 +01:00
parent 207e141cca
commit 0abc0d07dd
28 changed files with 246 additions and 280 deletions

View File

@@ -40,14 +40,15 @@ FeedForwardNetworkQuick::~FeedForwardNetworkQuick()
}
delete[] weights[i];
delete[] potentials[i];
if(i!=layers-1)
delete[] sums[i];
delete[] sums[i];
delete[] inputs[i];
}
delete[] sums[layers];
delete[] weights;
delete[] potentials;
delete[] layerSizes;
delete[] sums;
delete[] inputs;
}
if(ffLayers !=nullptr)
{
@@ -61,39 +62,40 @@ FeedForwardNetworkQuick::~FeedForwardNetworkQuick()
void FeedForwardNetworkQuick::solvePart(float *newSolution, register size_t begin, size_t end,size_t prevSize, float *sol,size_t layer)
{
if(prevSize >8)
if(prevSize >4)
{
__m128 partialSolution;
__m128 partialSolution2;
__m128 w;
__m128 sols;
__m128 w2;
__m128 sols2;
__m128 temporaryConst1=_mm_set1_ps(1.0);
__m128 temporaryConstLambda=_mm_set1_ps(-lambda);
register size_t alignedPrev=prevSize>8?(prevSize-(prevSize%8)):0;
float tmp;
register size_t alignedPrev=prevSize>16?(prevSize-(prevSize%16)):0;
for( size_t j=begin;j<end;j++)
{
tmp=0;
partialSolution= _mm_setzero_ps();
w=_mm_setzero_ps();
for(register size_t k=alignedPrev;k<prevSize;k++)
{
tmp+=sol[k]*weights[layer][j][k];
w = _mm_load_ss(this->weights[layer][j]+k);
sols = _mm_load_ss(sol+k);
w=_mm_mul_ps(w,sols);
partialSolution=_mm_add_ps(partialSolution,w);
// w=_mm_shuffle_ps(w,w,3*2^0+0*2^2+1*2^4+2*2^6);
// sols=_mm_shuffle_ps(sols,sols,3*2^0+0*2^2+1*2^4+2*2^6);
}
partialSolution = _mm_setzero_ps();
partialSolution2 = _mm_set_ss(tmp);
for(register size_t k=0;k<alignedPrev;k+=8)
for(register size_t k=0;k<alignedPrev;k+=4)
{
w = _mm_load_ps(this->weights[layer][j]+k);
w2 = _mm_load_ps(this->weights[layer][j]+k+4);
//_mm_prefetch((char*)this->weights[layer][j]+k+4,_MM_HINT_T0);
sols = _mm_load_ps(sol+k);
sols2 = _mm_load_ps(sol+k+4);
w=_mm_mul_ps(w,sols);
w2=_mm_mul_ps(w2,sols2);
partialSolution=_mm_add_ps(partialSolution,w);
partialSolution2=_mm_add_ps(partialSolution2,w2);
}
partialSolution = _mm_hadd_ps(partialSolution, partialSolution2);
/* pre-SSE3 solution
__m128 temp = _mm_add_ps(_mm_movehl_ps(foo128, foo128), foo128);
float x;
_mm_store_ss(&x, _mm_add_ss(temp, _mm_shuffle_ps(temp, 1)));
*/
partialSolution = _mm_hadd_ps(partialSolution, partialSolution);
partialSolution = _mm_hadd_ps(partialSolution, partialSolution);
_mm_store_ss(inputs[layer]+j,partialSolution);
@@ -120,7 +122,7 @@ void FeedForwardNetworkQuick::solvePart(float *newSolution, register size_t begi
Solution FeedForwardNetworkQuick::solve(const Problem& p)
{
std::vector<bool> solution(p);
std::vector<float> solution(p);
register float* sol=sums[0];//new bool[solution.size()];
for(size_t i=0;i<solution.size();i++)
@@ -132,11 +134,10 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
for(register size_t i=0;i<layers;i++)
{
float* newSolution= sums[i+1];//new bool[layerSizes[i]];
if(threads > 1 && (layerSizes[i] > 700 ||prevSize > 700)) // 600 is an guess about actual size, when creating thread has some speedup
if(threads > 1 && (layerSizes[i] > 700 ||prevSize > 700)) // 700 is an guess about actual size, when creating thread has some speedup
{
std::vector<std::thread> th;
size_t s=1;
//TODO THIS IS NOT WORKING!!!
size_t step =layerSizes[i]/threads;
for(size_t t=1;t<=threads;t++)
{
@@ -158,7 +159,7 @@ Solution FeedForwardNetworkQuick::solve(const Problem& p)
prevSize=layerSizes[i];
sol=newSolution;
}
std::vector<double> ret;
std::vector<float> ret;
for(size_t i=1;i<prevSize;i++)
{
ret.push_back(sol[i]);

View File

@@ -17,6 +17,7 @@
#include <xmmintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#include "../sse_mathfun.h"
#define LAMBDA 0.8
@@ -34,7 +35,7 @@ namespace NeuronNetwork
FFNeuron(float &pot, float *w, float &s, float &i,float lam):potential(pot),weights(w),sum(s),inputs(i),lambda(lam) { }
float getPotential() {return potential;}
void setPotential(double p) { potential=p;}
void setPotential(float p) { potential=p;}
float getWeight(unsigned int i ) { return weights[i];}
void setWeight(unsigned int i,float p) { weights[i]=p; }
inline float output() const { return sum; }

View File

@@ -6,60 +6,72 @@ Shin::NeuronNetwork::Learning::BackPropagation::BackPropagation(FeedForwardNetwo
}
Shin::NeuronNetwork::Learning::BackPropagation::~BackPropagation()
{
if(deltas!=nullptr)
{
for(size_t i=0;i<network.size();i++)
delete[] deltas[i];
}
delete[] deltas;
}
void Shin::NeuronNetwork::Learning::BackPropagation::propagate(const Shin::NeuronNetwork::Solution& expectation)
{
float **deltas;
deltas=new float*[network.size()];
for(int i=(int)network.size()-1;i>=0;i--)
if(deltas==nullptr)
{
deltas[i]=new float[network[i]->size()];
deltas[i][0]=0.0;
if(i==(int)network.size()-1)
deltas=new float*[network.size()];
for(size_t i=0;i<network.size();i++)
{
for(size_t j=1;j<network[i]->size();j++)
deltas[i]=new float[network[i]->size()];
deltas[i][0]=0.0;
}
}
for(size_t j=1;j<network[network.size()-1]->size();j++)
{
deltas[network.size()-1][j]= correction(expectation[j-1],network[network.size()-1]->operator[](j)->output())
*network[network.size()-1]->operator[](j)->derivatedOutput();
}
for(int i=(int)network.size()-2;i>=0;i--)
{
if(allowThreads)
{
std::vector<std::thread> th;
size_t s=0;
//TODO THIS IS NOT WORKING!!!
#define THREADS 4
int step =network[i]->size()/THREADS;
for(int t=1;t<=THREADS;t++)
{
deltas[i][j]= (expectation[j-1]-network[i]->operator[](j)->output())*network[i]->operator[](j)->derivatedOutput();
// std::cerr << "X "<< deltas[i][j] <" Z ";
if(s>=network[i]->size())
break;
th.push_back(std::thread([&i,this](size_t from, size_t to)->void{
for(size_t j=from;j<to;j++)
{
register float deltasWeight = 0;
for(size_t k=1;k<this->network[i+1]->size();k++)
{
deltasWeight+=deltas[i+1][k]*this->network[i+1]->operator[](k)->getWeight(j);
}
//deltas[i][j]*=this->network[i]->operator[](j)->derivatedOutput(); // WHY THE HELL IS SEQ here??
}
},s,t==THREADS?network[i]->size():s+step));//{}
s+=step;
}
for (auto& thr : th)
thr.join();
}else
{
if(allowThreads)
for(size_t j=0;j<network[i]->size();j++)
{
std::vector<std::thread> th;
int s=0;
//TODO THIS IS NOT WORKING!!!
#define THREADS 4
int step =network[i]->size()/THREADS;
for(int t=1;t<=THREADS;t++)
register float deltasWeight = 0;
for(size_t k=1;k<this->network[i+1]->size();k++)
{
if(s>=network[i]->size())
break;
th.push_back(std::thread([&i,this,&deltas](size_t from, size_t to)->void{
for(size_t j=from;j<to;j++)
{
register float deltasWeight = 0;
for(size_t k=1;k<this->network[i+1]->size();k++)
{
deltasWeight+=deltas[i+1][k]*this->network[i+1]->operator[](k)->getWeight(j);
}
//deltas[i][j]*=this->network[i]->operator[](j)->derivatedOutput(); // WHY THE HELL IS SEQ here??
}
},s,t==THREADS?network[i]->size():s+step));//{}
s+=step;
}
for (auto& thr : th)
thr.join();
}else
{
for(size_t j=0;j<network[i]->size();j++)
{
register float deltasWeight = 0;
for(size_t k=1;k<this->network[i+1]->size();k++)
{
deltasWeight+=deltas[i+1][k]*this->network[i+1]->operator[](k)->getWeight(j);
}
deltas[i][j]=deltasWeight*this->network[i]->operator[](j)->derivatedOutput();
deltasWeight+=deltas[i+1][k]*this->network[i+1]->operator[](k)->getWeight(j);
}
deltas[i][j]=deltasWeight*this->network[i]->operator[](j)->derivatedOutput();
}
}
}
@@ -83,11 +95,6 @@ void Shin::NeuronNetwork::Learning::BackPropagation::propagate(const Shin::Neuro
}
}
}
for(size_t i=0;i<network.size();i++)
{
delete[] deltas[i];
}
delete[] deltas;
}
@@ -96,7 +103,7 @@ float Shin::NeuronNetwork::Learning::BackPropagation::teach(const Shin::NeuronNe
Shin::NeuronNetwork::Solution a=network.solve(p);
double error=calculateError(solution,a);
std::vector<double> s;
std::vector<float> s;
if(entropy)
{
for(size_t i=0;i<solution.size();i++)
@@ -120,3 +127,8 @@ void Shin::NeuronNetwork::Learning::BackPropagation::setLearningCoeficient(float
learningCoeficient=c;
}
float Shin::NeuronNetwork::Learning::BackPropagation::correction(float expected, float computed)
{
return expected-computed;
}

View File

@@ -30,6 +30,11 @@ namespace Learning
{
public:
BackPropagation(FeedForwardNetworkQuick &n);
virtual ~BackPropagation();
BackPropagation(const Shin::NeuronNetwork::Learning::BackPropagation&) =delete;
BackPropagation operator=(const Shin::NeuronNetwork::Learning::BackPropagation&) =delete;
virtual void propagate(const Shin::NeuronNetwork::Solution& expectation);
float teach(const Shin::NeuronNetwork::Problem &p,const Solution &solution);
@@ -38,10 +43,12 @@ namespace Learning
void setEntropySize(int milipercents) { entropySize=milipercents; }
inline void allowThreading() {allowThreads=1; }
protected:
virtual float correction(float expected, float computed);
float learningCoeficient=0.4;
bool entropy=0;
bool allowThreads=0;
int entropySize=500;
float **deltas=nullptr;
};
}
}

View File

@@ -5,61 +5,9 @@ Shin::NeuronNetwork::Learning::OpticalBackPropagation::OpticalBackPropagation(Fe
setEntropySize(100);
}
void Shin::NeuronNetwork::Learning::OpticalBackPropagation::propagate(const Shin::NeuronNetwork::Solution& expectation)
float Shin::NeuronNetwork::Learning::OpticalBackPropagation::correction(float expected, float computed)
{
double **deltas;
deltas=new double*[network.size()];
for(int i=(int)network.size()-1;i>=0;i--)
{
deltas[i]=new double[network[i]->size()];
deltas[i][0]=0.0;
if(i==(int)network.size()-1)
{
for(size_t j=1;j<network[i]->size();j++)
{
register double tmp=(expectation[j-1]-network[i]->operator[](j)->output());
deltas[i][j]= (1+exp(tmp*tmp))*network[i]->operator[](j)->derivatedOutput();
if(tmp <0)
{
deltas[i][j]=-deltas[i][j];
}
}
}else
{
for(size_t j=1;j<network[i]->size();j++)
{
register double deltasWeight = 0;
for(size_t k=1;k<network[i+1]->size();k++)
{
deltasWeight+=deltas[i+1][k]*network[i+1]->operator[](k)->getWeight(j);
}
deltas[i][j]=deltasWeight*network[i]->operator[](j)->derivatedOutput();
}
}
}
for(size_t i=0;i<network.size();i++)
{
size_t max;
if(i==0)
max=network[i]->size();
else
max=network[i-1]->size();
for(size_t j=1;j<network[i]->size();j++)
{
network[i]->operator[](j)->setWeight(0,network[i]->operator[](j)->getWeight(0)+deltas[i][j]*learningCoeficient);
for(size_t k=1;k<max;k++)
{
network[i]->operator[](j)->setWeight(k,
network[i]->operator[](j)->getWeight(k)+learningCoeficient* deltas[i][j]*
(i==0? network.sums[0][k]:(double)network[i-1]->operator[](k)->output()));
}
}
}
for(size_t i=0;i<network.size();i++)
{
delete[] deltas[i];
}
delete[] deltas;
register float tmp=(expected-computed);
register float ret=1+exp(tmp*tmp);
return tmp < 0? -ret:ret;
}

View File

@@ -24,8 +24,8 @@ namespace Learning
{
public:
OpticalBackPropagation(FeedForwardNetworkQuick &n);
virtual void propagate(const Shin::NeuronNetwork::Solution& expectation) override;
protected:
virtual float correction(float expected, float computed) override;
};
}
}

View File

@@ -2,7 +2,7 @@
Shin::NeuronNetwork::Learning::Reinforcement::Reinforcement(Shin::NeuronNetwork::FeedForwardNetworkQuick& n): Unsupervised(n), p(new BackPropagation(n))
{
p->setLearningCoeficient(9);
p->setLearningCoeficient(1);
}
Shin::NeuronNetwork::Learning::Reinforcement::~Reinforcement()
@@ -10,39 +10,43 @@ Shin::NeuronNetwork::Learning::Reinforcement::~Reinforcement()
delete p;
}
void Shin::NeuronNetwork::Learning::Reinforcement::setQualityFunction(std::function< double(const Problem&,const Solution&) > f)
void Shin::NeuronNetwork::Learning::Reinforcement::setQualityFunction(std::function< float(const Problem&,const Solution&) > f)
{
qualityFunction=f;
}
double Shin::NeuronNetwork::Learning::Reinforcement::learn(const Shin::NeuronNetwork::Problem& problem)
float Shin::NeuronNetwork::Learning::Reinforcement::learn(const Shin::NeuronNetwork::Problem& problem)
{
//network[2]->operator[](0)->setWeight(0,-5);
Solution s=network.solve(problem);
double quality=qualityFunction(problem,s);
std::vector<double> q;
float quality=qualityFunction(problem,s);
std::vector<float> q;
//std::cerr << s[0] << "\n";
for(register size_t j=0;j<s.size();j++)
{
q.push_back(s[j]*((double)(990+(rand()%21))/1000.0));
q.push_back(s[j]);//*((float)(990+(rand()%21))/1000.0));
}
if(quality <= 0)
{
for(register size_t j=0;j<s.size();j++)
{
do{
q[j]=((double)(10+rand()%80))/100.0;
}while(fabs(q[j]-s[j]) < 0.1);
q[j]=((float)(100-(rand()%101)))/100.0;
}
}
for(register int i=abs((int)quality);i>=0;i--)
register int i=abs((int)quality);
if(quality != 0.0 && i==0)
i+=1;
for(;i>=0;i--)
{
p->propagate(q);
network.solve(problem); // resolve problem ??? TOTO: should it be here?
}
return quality;
}
double Shin::NeuronNetwork::Learning::Reinforcement::learnSet(const std::vector< Shin::NeuronNetwork::Problem* >& problems)
float Shin::NeuronNetwork::Learning::Reinforcement::learnSet(const std::vector< Shin::NeuronNetwork::Problem* >& problems)
{
double err=0;
float err=0;
for(Shin::NeuronNetwork::Problem *pr:problems)
{
err+=learn(*pr);

View File

@@ -45,15 +45,15 @@ namespace Learning
Reinforcement(const Reinforcement&) =delete;
Reinforcement& operator=(const Reinforcement&) =delete;
void setQualityFunction(std::function<double(const Problem&,const Solution&)>);
double learn(const Shin::NeuronNetwork::Problem &p);
double learnSet(const std::vector<Shin::NeuronNetwork::Problem*> &);
void setQualityFunction(std::function<float(const Problem&,const Solution&)>);
float learn(const Shin::NeuronNetwork::Problem &p);
float learnSet(const std::vector<Shin::NeuronNetwork::Problem*> &);
void setCoef(double q);
inline BackPropagation& getPropagator() {return *p;};
void setPropagator(BackPropagation *p);
protected:
double learningCoeficient=3;
std::function<double(const Problem&,const Solution&)> qualityFunction=nullptr;
float learningCoeficient=3;
std::function<float(const Problem&,const Solution&)> qualityFunction=nullptr;
BackPropagation *p;
};
}

View File

@@ -19,9 +19,9 @@ Layer::~Layer()
}
Solution Layer::solve(const std::vector<double> &input)
Solution Layer::solve(const std::vector<float> &input)
{
std::vector <double> ret;
std::vector <float> ret;
for(Neuron *n:neurons)
{
ret.push_back(n->output(input));

View File

@@ -42,7 +42,7 @@ namespace NeuronNetwork
}
}
~Layer();
Solution solve(const std::vector<double> &input);
Solution solve(const std::vector<float> &input);
Neuron* operator[](int neuron) const;
int size() const {return neurons.size();};
protected:

View File

@@ -6,17 +6,17 @@ Neuron::Neuron(): potential(1),weights()
{
}
double Neuron::getPotential() const
float Neuron::getPotential() const
{
return potential;
}
void Neuron::setPotential(double p)
void Neuron::setPotential(float p)
{
potential=p;
}
double Neuron::getWeight(unsigned int i) const
float Neuron::getWeight(unsigned int i) const
{
if(i >= weights.size())
{
@@ -25,7 +25,7 @@ double Neuron::getWeight(unsigned int i) const
return weights[0];
}
void Neuron::setWeight(unsigned int i,double p)
void Neuron::setWeight(unsigned int i,float p)
{
if(i >= weights.size())
{
@@ -36,9 +36,9 @@ void Neuron::setWeight(unsigned int i,double p)
weights[i]=p;
}
double Neuron::output(std::vector<double> input)
float Neuron::output(std::vector<float> input)
{
register double sum=0;
register float sum=0;
for(unsigned int i=0;i<input.size();i++)
{
// std::cerr << "W: " << getWeight(i) <<"\n";

View File

@@ -16,18 +16,18 @@ namespace NeuronNetwork
{
}
double getPotential() const;
void setPotential(double p);
double getWeight(unsigned int) const;
void setWeight(unsigned int i,double p);
double output(const std::vector<double>);
double output() { return lastOutput;}
float getPotential() const;
void setPotential(float p);
float getWeight(unsigned int) const;
void setWeight(unsigned int i,float p);
float output(const std::vector<float>);
float output() { return lastOutput;}
protected:
double potential;
private:
std::vector<double> weights;
double lastOutput=0.0;
double lastInput=0.0;
std::vector<float> weights;
float lastOutput=0.0;
float lastInput=0.0;
};
class SimpleNeuron: public Neuron
{

View File

@@ -7,7 +7,7 @@ Problem::Problem()
}
Problem::operator std::vector<bool>() const
Problem::operator std::vector<float>() const
{
return representation();
}

View File

@@ -13,8 +13,8 @@ namespace NeuronNetwork
public:
Problem();
virtual ~Problem(){};
operator std::vector<bool>() const;
virtual std::vector<bool> representation() const =0;
operator std::vector<float>() const;
virtual std::vector<float> representation() const =0;
protected:
private:
};

View File

@@ -2,21 +2,21 @@
using namespace Shin::NeuronNetwork;
Solution::Solution(std::vector<double>sol):solution(sol)
Solution::Solution(std::vector<float>sol):solution(sol)
{
}
Solution::Solution(const Problem& p):solution()
{
std::vector<bool> q(p);
for(bool s:q)
std::vector<float> q(p);
for(float s:q)
{
solution.push_back(s);
}
}
double Solution::operator[](size_t pos) const
float Solution::operator[](size_t pos) const
{
return solution[pos];
}
@@ -26,7 +26,7 @@ size_t Solution::size() const
return solution.size();
}
Solution::operator std::vector<double>()
Solution::operator std::vector<float>()
{
return solution;
}

View File

@@ -13,12 +13,12 @@ namespace NeuronNetwork
{
public:
Solution(const Problem& p);
Solution(std::vector<double> solution);
Solution(std::vector<float> solution);
size_t size() const;
double operator[] (size_t pos) const;
operator std::vector<double>();
float operator[] (size_t pos) const;
operator std::vector<float>();
protected:
std::vector<double> solution;
std::vector<float> solution;
};
}
}