iRprop+ implementation
This commit is contained in:
64
include/NeuralNetwork/Learning/iRPropPlus.h
Normal file
64
include/NeuralNetwork/Learning/iRPropPlus.h
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "BatchPropagation.h"
|
||||||
|
|
||||||
|
namespace NeuralNetwork {
|
||||||
|
namespace Learning {
|
||||||
|
|
||||||
|
/** @class Resilient Propagation
|
||||||
|
* @brief
|
||||||
|
*/
|
||||||
|
class iRPropPlus : public BatchPropagation {
|
||||||
|
|
||||||
|
public:
|
||||||
|
iRPropPlus(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
|
||||||
|
BatchPropagation(feedForwardNetwork, correction) {
|
||||||
|
}
|
||||||
|
|
||||||
|
iRPropPlus(const iRPropPlus&)=delete;
|
||||||
|
iRPropPlus& operator=(const NeuralNetwork::Learning::iRPropPlus&) = delete;
|
||||||
|
|
||||||
|
void setInitialWeightChange(float initVal) {
|
||||||
|
initialWeightChange=initVal;
|
||||||
|
}
|
||||||
|
void setLearningCoefficient(float) {
|
||||||
|
|
||||||
|
}
|
||||||
|
protected:
|
||||||
|
|
||||||
|
virtual inline void resize() override {
|
||||||
|
BatchPropagation::resize();
|
||||||
|
|
||||||
|
_lastGradients =_gradients;
|
||||||
|
|
||||||
|
_changesOfWeightChanges = _lastGradients;
|
||||||
|
for(std::size_t i = 1; i < _network.size(); i++) {
|
||||||
|
for(std::size_t j = 0; j < _changesOfWeightChanges[i].size(); j++) {
|
||||||
|
std::fill(_changesOfWeightChanges[i][j].begin(),_changesOfWeightChanges[i][j].end(),initialWeightChange);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_lastWeightChanges = _lastGradients;
|
||||||
|
for(std::size_t i = 1; i < _network.size(); i++) {
|
||||||
|
for(std::size_t j = 0; j < _lastWeightChanges[i].size(); j++) {
|
||||||
|
std::fill(_lastWeightChanges[i][j].begin(),_lastWeightChanges[i][j].end(),0.1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateWeightsAndEndBatch() override;
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<float>>> _lastGradients = {};
|
||||||
|
std::vector<std::vector<std::vector<float>>> _lastWeightChanges = {};
|
||||||
|
std::vector<std::vector<std::vector<float>>> _changesOfWeightChanges = {};
|
||||||
|
|
||||||
|
float _prevError=0;
|
||||||
|
|
||||||
|
float maxChangeOfWeights = 5;
|
||||||
|
float minChangeOfWeights = 0.0001;
|
||||||
|
|
||||||
|
float initialWeightChange=0.02;
|
||||||
|
float weightChangePlus=1.2;
|
||||||
|
float weightChangeMinus=0.5;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
52
src/NeuralNetwork/Learning/iRPropPlus.cpp
Normal file
52
src/NeuralNetwork/Learning/iRPropPlus.cpp
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
#include <NeuralNetwork/Learning/iRPropPlus.h>
|
||||||
|
|
||||||
|
void NeuralNetwork::Learning::iRPropPlus::updateWeightsAndEndBatch() {
|
||||||
|
float error = 0.0;
|
||||||
|
const auto& outputLayer=_network[_network.size()-1];
|
||||||
|
for(std::size_t j=1;j<outputLayer.size();j++) {
|
||||||
|
error+=_slopes[_network.size()-1][j];
|
||||||
|
}
|
||||||
|
|
||||||
|
error /= outputLayer.size();
|
||||||
|
|
||||||
|
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
|
||||||
|
auto &layer = _network[layerIndex];
|
||||||
|
auto &prevLayer = _network[layerIndex - 1];
|
||||||
|
|
||||||
|
std::size_t prevLayerSize = prevLayer.size();
|
||||||
|
std::size_t layerSize = layer.size();
|
||||||
|
|
||||||
|
for(std::size_t j = 1; j < layerSize; j++) {
|
||||||
|
for(std::size_t k = 0; k < prevLayerSize; k++) {
|
||||||
|
float gradient = _gradients[layerIndex][j][k];
|
||||||
|
float lastGradient = _lastGradients[layerIndex][j][k];
|
||||||
|
|
||||||
|
_lastGradients[layerIndex][j][k] = gradient;
|
||||||
|
|
||||||
|
float weightChangeDelta = _changesOfWeightChanges[layerIndex][j][k];
|
||||||
|
float delta;
|
||||||
|
|
||||||
|
if(gradient * lastGradient > 0) {
|
||||||
|
weightChangeDelta = std::min(weightChangeDelta*weightChangePlus,maxChangeOfWeights);
|
||||||
|
delta = (std::signbit(gradient)? 1.0f : -1.0f ) * weightChangeDelta;
|
||||||
|
layer[j].weight(k) -= delta;
|
||||||
|
} else if (gradient * lastGradient < 0) {
|
||||||
|
weightChangeDelta = std::max(weightChangeDelta*weightChangeMinus,minChangeOfWeights);
|
||||||
|
delta = _lastWeightChanges[layerIndex][j][k];
|
||||||
|
if(error > _prevError) {
|
||||||
|
layer[j].weight(k) += delta;
|
||||||
|
}
|
||||||
|
_lastGradients[layerIndex][j][k] = 0;
|
||||||
|
} else {
|
||||||
|
delta = (std::signbit(gradient)? 1.0f : -1.0f ) * weightChangeDelta;
|
||||||
|
layer[j].weight(k) -= delta;
|
||||||
|
}
|
||||||
|
//std::cout << delta <<"\n";
|
||||||
|
|
||||||
|
_changesOfWeightChanges[layerIndex][j][k] = weightChangeDelta;
|
||||||
|
_lastWeightChanges[layerIndex][j][k] = delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_prevError = error;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user