backprop: momentums + decay, quickprop: renaming
This commit is contained in:
@@ -31,6 +31,22 @@ namespace Learning {
|
|||||||
|
|
||||||
inline virtual void setLearningCoefficient (const float& coefficient) { learningCoefficient=coefficient; }
|
inline virtual void setLearningCoefficient (const float& coefficient) { learningCoefficient=coefficient; }
|
||||||
|
|
||||||
|
float getMomentumWeight() const {
|
||||||
|
return momentumWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setMomentumWeight(const float& m) {
|
||||||
|
momentumWeight=m;
|
||||||
|
}
|
||||||
|
|
||||||
|
float getWeightDecay() const {
|
||||||
|
return weightDecay;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setWeightDecay(const float& wd) {
|
||||||
|
weightDecay=wd;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
virtual inline void resize() {
|
virtual inline void resize() {
|
||||||
@@ -41,11 +57,25 @@ namespace Learning {
|
|||||||
if(slopes[i].size()!=network[i].size())
|
if(slopes[i].size()!=network[i].size())
|
||||||
slopes[i].resize(network[i].size());
|
slopes[i].resize(network[i].size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(lastDeltas.size()!=network.size())
|
||||||
|
lastDeltas.resize(network.size());
|
||||||
|
|
||||||
|
for(std::size_t i=0; i < network.size(); i++) {
|
||||||
|
if(lastDeltas[i].size()!=network[i].size()) {
|
||||||
|
lastDeltas[i].resize(network[i].size());
|
||||||
|
|
||||||
|
for(std::size_t j = 0; j < lastDeltas[i].size(); j++) {
|
||||||
|
lastDeltas[i][j] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
deltas= lastDeltas;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void updateWeights(const std::vector<float> &input);
|
virtual void updateWeights(const std::vector<float> &input);
|
||||||
|
|
||||||
virtual void computeDeltas(const std::vector<float> &expectation);
|
virtual void computeSlopes(const std::vector<float> &expectation);
|
||||||
|
|
||||||
FeedForward::Network &network;
|
FeedForward::Network &network;
|
||||||
|
|
||||||
@@ -53,7 +83,13 @@ namespace Learning {
|
|||||||
|
|
||||||
float learningCoefficient;
|
float learningCoefficient;
|
||||||
|
|
||||||
|
float momentumWeight = 0.0;
|
||||||
|
|
||||||
|
float weightDecay = 0.0;
|
||||||
|
|
||||||
std::vector<std::vector<float>> slopes;
|
std::vector<std::vector<float>> slopes;
|
||||||
|
std::vector<std::vector<float>> deltas;
|
||||||
|
std::vector<std::vector<float>> lastDeltas;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -36,36 +36,24 @@ namespace NeuralNetwork {
|
|||||||
slopes[i].resize(network[i].size());
|
slopes[i].resize(network[i].size());
|
||||||
}
|
}
|
||||||
|
|
||||||
if(previousSlopes.size()!=network.size())
|
if(deltas.size()!=network.size())
|
||||||
previousSlopes.resize(network.size());
|
deltas.resize(network.size());
|
||||||
|
|
||||||
for(std::size_t i=0; i < network.size(); i++) {
|
for(std::size_t i=0; i < network.size(); i++) {
|
||||||
if(previousSlopes[i].size()!=network[i].size())
|
if(deltas[i].size()!=network[i].size())
|
||||||
previousSlopes[i].resize(network[i].size());
|
deltas[i].resize(network[i].size());
|
||||||
|
|
||||||
for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
|
for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
|
||||||
previousSlopes[i][j]=1.0;
|
deltas[i][j]=1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
weightChange= deltas;
|
||||||
if(lastWeightChange.size()!=network.size())
|
|
||||||
lastWeightChange.resize(network.size());
|
|
||||||
|
|
||||||
for(std::size_t i=0; i < network.size(); i++) {
|
|
||||||
if(lastWeightChange[i].size()!=network[i].size())
|
|
||||||
lastWeightChange[i].resize(network[i].size());
|
|
||||||
|
|
||||||
for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
|
|
||||||
lastWeightChange[i][j]=1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
weightChange= lastWeightChange;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void updateWeights(const std::vector<float> &input) override;
|
virtual void updateWeights(const std::vector<float> &input) override;
|
||||||
|
|
||||||
std::vector<std::vector<float>> previousSlopes ={};
|
std::vector<std::vector<float>> previousSlopes ={};
|
||||||
std::vector<std::vector<float>> lastWeightChange ={};
|
std::vector<std::vector<float>> deltas ={};
|
||||||
std::vector<std::vector<float>> weightChange ={};
|
std::vector<std::vector<float>> weightChange ={};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,9 +9,11 @@ void NeuralNetwork::Learning::BackPropagation::teach(const std::vector<float> &i
|
|||||||
|
|
||||||
resize();
|
resize();
|
||||||
|
|
||||||
computeDeltas(expectation);
|
computeSlopes(expectation);
|
||||||
|
|
||||||
updateWeights(input);
|
updateWeights(input);
|
||||||
|
|
||||||
|
std::swap(deltas,lastDeltas);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -28,21 +30,25 @@ void NeuralNetwork::Learning::BackPropagation::updateWeights(const std::vector<f
|
|||||||
|
|
||||||
float delta =slopes[layerIndex][j]*learningCoefficient;
|
float delta =slopes[layerIndex][j]*learningCoefficient;
|
||||||
|
|
||||||
layer[j].weight(0)+=delta;
|
//momentum
|
||||||
|
delta += momentumWeight * lastDeltas[layerIndex][j];
|
||||||
|
|
||||||
|
deltas[layerIndex][j]=delta;
|
||||||
|
|
||||||
|
layer[j].weight(0)+=delta - weightDecay *layer[j].weight(0);
|
||||||
|
|
||||||
for(std::size_t k=1;k<prevLayerSize;k++) {
|
for(std::size_t k=1;k<prevLayerSize;k++) {
|
||||||
if(layerIndex==1) {
|
if(layerIndex==1) {
|
||||||
layer[j].weight(k)+=delta*input[k-1];
|
layer[j].weight(k)+=delta*input[k-1] - weightDecay * layer[j].weight(k);
|
||||||
} else {
|
} else {
|
||||||
layer[j].weight(k)+=delta*prevLayer[k].output();
|
layer[j].weight(k)+=delta*prevLayer[k].output() - weightDecay * layer[j].weight(k);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void NeuralNetwork::Learning::BackPropagation::computeDeltas(const std::vector<float> &expectation) {
|
void NeuralNetwork::Learning::BackPropagation::computeSlopes(const std::vector<float> &expectation) {
|
||||||
auto& outputLayer=network[network.size()-1];
|
auto& outputLayer=network[network.size()-1];
|
||||||
for(std::size_t j=1;j<outputLayer.size();j++) {
|
for(std::size_t j=1;j<outputLayer.size();j++) {
|
||||||
auto& neuron = outputLayer[j];
|
auto& neuron = outputLayer[j];
|
||||||
|
|||||||
@@ -18,17 +18,17 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
|
|||||||
|
|
||||||
float newChange=0;
|
float newChange=0;
|
||||||
|
|
||||||
if(fabs (lastWeightChange[layerIndex][j])> 0.0001) {
|
if(fabs (deltas[layerIndex][j])> 0.0001) {
|
||||||
if(std::signbit(lastWeightChange[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
|
if(std::signbit(deltas[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
|
||||||
newChange+= slopes[layerIndex][j]*_epsilon;
|
newChange+= slopes[layerIndex][j]*_epsilon;
|
||||||
|
|
||||||
if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
|
if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
|
||||||
newChange += _maxChange * lastWeightChange[layerIndex][j];
|
newChange += _maxChange * deltas[layerIndex][j];
|
||||||
}else {
|
}else {
|
||||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * lastWeightChange[layerIndex][j];
|
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * lastWeightChange[layerIndex][j];
|
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newChange+= slopes[layerIndex][j]*_epsilon;
|
newChange+= slopes[layerIndex][j]*_epsilon;
|
||||||
@@ -49,5 +49,5 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
|
|||||||
}
|
}
|
||||||
|
|
||||||
slopes.swap(previousSlopes);
|
slopes.swap(previousSlopes);
|
||||||
weightChange.swap(lastWeightChange);
|
weightChange.swap(deltas);
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user