backprop: momentums + decay, quickprop: renaming

This commit is contained in:
2016-09-07 22:26:50 +02:00
parent b6b7703299
commit 9fbcb727a2
4 changed files with 62 additions and 32 deletions

View File

@@ -31,6 +31,22 @@ namespace Learning {
inline virtual void setLearningCoefficient (const float& coefficient) { learningCoefficient=coefficient; } inline virtual void setLearningCoefficient (const float& coefficient) { learningCoefficient=coefficient; }
float getMomentumWeight() const {
return momentumWeight;
}
void setMomentumWeight(const float& m) {
momentumWeight=m;
}
float getWeightDecay() const {
return weightDecay;
}
void setWeightDecay(const float& wd) {
weightDecay=wd;
}
protected: protected:
virtual inline void resize() { virtual inline void resize() {
@@ -41,11 +57,25 @@ namespace Learning {
if(slopes[i].size()!=network[i].size()) if(slopes[i].size()!=network[i].size())
slopes[i].resize(network[i].size()); slopes[i].resize(network[i].size());
} }
if(lastDeltas.size()!=network.size())
lastDeltas.resize(network.size());
for(std::size_t i=0; i < network.size(); i++) {
if(lastDeltas[i].size()!=network[i].size()) {
lastDeltas[i].resize(network[i].size());
for(std::size_t j = 0; j < lastDeltas[i].size(); j++) {
lastDeltas[i][j] = 0.0;
}
}
}
deltas= lastDeltas;
} }
virtual void updateWeights(const std::vector<float> &input); virtual void updateWeights(const std::vector<float> &input);
virtual void computeDeltas(const std::vector<float> &expectation); virtual void computeSlopes(const std::vector<float> &expectation);
FeedForward::Network &network; FeedForward::Network &network;
@@ -53,7 +83,13 @@ namespace Learning {
float learningCoefficient; float learningCoefficient;
float momentumWeight = 0.0;
float weightDecay = 0.0;
std::vector<std::vector<float>> slopes; std::vector<std::vector<float>> slopes;
std::vector<std::vector<float>> deltas;
std::vector<std::vector<float>> lastDeltas;
}; };
} }
} }

View File

@@ -36,36 +36,24 @@ namespace NeuralNetwork {
slopes[i].resize(network[i].size()); slopes[i].resize(network[i].size());
} }
if(previousSlopes.size()!=network.size()) if(deltas.size()!=network.size())
previousSlopes.resize(network.size()); deltas.resize(network.size());
for(std::size_t i=0; i < network.size(); i++) { for(std::size_t i=0; i < network.size(); i++) {
if(previousSlopes[i].size()!=network[i].size()) if(deltas[i].size()!=network[i].size())
previousSlopes[i].resize(network[i].size()); deltas[i].resize(network[i].size());
for(std::size_t j=0; j < previousSlopes[i].size(); j++) { for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
previousSlopes[i][j]=1.0; deltas[i][j]=1.0;
} }
} }
weightChange= deltas;
if(lastWeightChange.size()!=network.size())
lastWeightChange.resize(network.size());
for(std::size_t i=0; i < network.size(); i++) {
if(lastWeightChange[i].size()!=network[i].size())
lastWeightChange[i].resize(network[i].size());
for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
lastWeightChange[i][j]=1.0;
}
}
weightChange= lastWeightChange;
} }
virtual void updateWeights(const std::vector<float> &input) override; virtual void updateWeights(const std::vector<float> &input) override;
std::vector<std::vector<float>> previousSlopes ={}; std::vector<std::vector<float>> previousSlopes ={};
std::vector<std::vector<float>> lastWeightChange ={}; std::vector<std::vector<float>> deltas ={};
std::vector<std::vector<float>> weightChange ={}; std::vector<std::vector<float>> weightChange ={};
}; };
} }

View File

@@ -9,9 +9,11 @@ void NeuralNetwork::Learning::BackPropagation::teach(const std::vector<float> &i
resize(); resize();
computeDeltas(expectation); computeSlopes(expectation);
updateWeights(input); updateWeights(input);
std::swap(deltas,lastDeltas);
} }
@@ -28,21 +30,25 @@ void NeuralNetwork::Learning::BackPropagation::updateWeights(const std::vector<f
float delta =slopes[layerIndex][j]*learningCoefficient; float delta =slopes[layerIndex][j]*learningCoefficient;
layer[j].weight(0)+=delta; //momentum
delta += momentumWeight * lastDeltas[layerIndex][j];
deltas[layerIndex][j]=delta;
layer[j].weight(0)+=delta - weightDecay *layer[j].weight(0);
for(std::size_t k=1;k<prevLayerSize;k++) { for(std::size_t k=1;k<prevLayerSize;k++) {
if(layerIndex==1) { if(layerIndex==1) {
layer[j].weight(k)+=delta*input[k-1]; layer[j].weight(k)+=delta*input[k-1] - weightDecay * layer[j].weight(k);
} else { } else {
layer[j].weight(k)+=delta*prevLayer[k].output(); layer[j].weight(k)+=delta*prevLayer[k].output() - weightDecay * layer[j].weight(k);
} }
} }
} }
} }
} }
void NeuralNetwork::Learning::BackPropagation::computeDeltas(const std::vector<float> &expectation) { void NeuralNetwork::Learning::BackPropagation::computeSlopes(const std::vector<float> &expectation) {
auto& outputLayer=network[network.size()-1]; auto& outputLayer=network[network.size()-1];
for(std::size_t j=1;j<outputLayer.size();j++) { for(std::size_t j=1;j<outputLayer.size();j++) {
auto& neuron = outputLayer[j]; auto& neuron = outputLayer[j];

View File

@@ -18,17 +18,17 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
float newChange=0; float newChange=0;
if(fabs (lastWeightChange[layerIndex][j])> 0.0001) { if(fabs (deltas[layerIndex][j])> 0.0001) {
if(std::signbit(lastWeightChange[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) { if(std::signbit(deltas[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
newChange+= slopes[layerIndex][j]*_epsilon; newChange+= slopes[layerIndex][j]*_epsilon;
if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) { if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
newChange += _maxChange * lastWeightChange[layerIndex][j]; newChange += _maxChange * deltas[layerIndex][j];
}else { }else {
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * lastWeightChange[layerIndex][j]; newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
} }
} else { } else {
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * lastWeightChange[layerIndex][j]; newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
} }
} else { } else {
newChange+= slopes[layerIndex][j]*_epsilon; newChange+= slopes[layerIndex][j]*_epsilon;
@@ -49,5 +49,5 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
} }
slopes.swap(previousSlopes); slopes.swap(previousSlopes);
weightChange.swap(lastWeightChange); weightChange.swap(deltas);
} }