iRprop+ implementation

This commit is contained in:
2016-11-01 21:38:56 +01:00
parent 913a5cc41f
commit 173cfc9789
2 changed files with 116 additions and 0 deletions

View File

@@ -0,0 +1,64 @@
#pragma once
#include "BatchPropagation.h"
namespace NeuralNetwork {
namespace Learning {
/** @class Resilient Propagation
* @brief
*/
class iRPropPlus : public BatchPropagation {
public:
iRPropPlus(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
BatchPropagation(feedForwardNetwork, correction) {
}
iRPropPlus(const iRPropPlus&)=delete;
iRPropPlus& operator=(const NeuralNetwork::Learning::iRPropPlus&) = delete;
void setInitialWeightChange(float initVal) {
initialWeightChange=initVal;
}
void setLearningCoefficient(float) {
}
protected:
virtual inline void resize() override {
BatchPropagation::resize();
_lastGradients =_gradients;
_changesOfWeightChanges = _lastGradients;
for(std::size_t i = 1; i < _network.size(); i++) {
for(std::size_t j = 0; j < _changesOfWeightChanges[i].size(); j++) {
std::fill(_changesOfWeightChanges[i][j].begin(),_changesOfWeightChanges[i][j].end(),initialWeightChange);
}
}
_lastWeightChanges = _lastGradients;
for(std::size_t i = 1; i < _network.size(); i++) {
for(std::size_t j = 0; j < _lastWeightChanges[i].size(); j++) {
std::fill(_lastWeightChanges[i][j].begin(),_lastWeightChanges[i][j].end(),0.1);
}
}
}
void updateWeightsAndEndBatch() override;
std::vector<std::vector<std::vector<float>>> _lastGradients = {};
std::vector<std::vector<std::vector<float>>> _lastWeightChanges = {};
std::vector<std::vector<std::vector<float>>> _changesOfWeightChanges = {};
float _prevError=0;
float maxChangeOfWeights = 5;
float minChangeOfWeights = 0.0001;
float initialWeightChange=0.02;
float weightChangePlus=1.2;
float weightChangeMinus=0.5;
};
}
}

View File

@@ -0,0 +1,52 @@
#include <NeuralNetwork/Learning/iRPropPlus.h>
void NeuralNetwork::Learning::iRPropPlus::updateWeightsAndEndBatch() {
float error = 0.0;
const auto& outputLayer=_network[_network.size()-1];
for(std::size_t j=1;j<outputLayer.size();j++) {
error+=_slopes[_network.size()-1][j];
}
error /= outputLayer.size();
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
auto &layer = _network[layerIndex];
auto &prevLayer = _network[layerIndex - 1];
std::size_t prevLayerSize = prevLayer.size();
std::size_t layerSize = layer.size();
for(std::size_t j = 1; j < layerSize; j++) {
for(std::size_t k = 0; k < prevLayerSize; k++) {
float gradient = _gradients[layerIndex][j][k];
float lastGradient = _lastGradients[layerIndex][j][k];
_lastGradients[layerIndex][j][k] = gradient;
float weightChangeDelta = _changesOfWeightChanges[layerIndex][j][k];
float delta;
if(gradient * lastGradient > 0) {
weightChangeDelta = std::min(weightChangeDelta*weightChangePlus,maxChangeOfWeights);
delta = (std::signbit(gradient)? 1.0f : -1.0f ) * weightChangeDelta;
layer[j].weight(k) -= delta;
} else if (gradient * lastGradient < 0) {
weightChangeDelta = std::max(weightChangeDelta*weightChangeMinus,minChangeOfWeights);
delta = _lastWeightChanges[layerIndex][j][k];
if(error > _prevError) {
layer[j].weight(k) += delta;
}
_lastGradients[layerIndex][j][k] = 0;
} else {
delta = (std::signbit(gradient)? 1.0f : -1.0f ) * weightChangeDelta;
layer[j].weight(k) -= delta;
}
//std::cout << delta <<"\n";
_changesOfWeightChanges[layerIndex][j][k] = weightChangeDelta;
_lastWeightChanges[layerIndex][j][k] = delta;
}
}
}
_prevError = error;
}