iRprop+ implementation
This commit is contained in:
64
include/NeuralNetwork/Learning/iRPropPlus.h
Normal file
64
include/NeuralNetwork/Learning/iRPropPlus.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
|
||||
#include "BatchPropagation.h"
|
||||
|
||||
namespace NeuralNetwork {
|
||||
namespace Learning {
|
||||
|
||||
/** @class Resilient Propagation
|
||||
* @brief
|
||||
*/
|
||||
class iRPropPlus : public BatchPropagation {
|
||||
|
||||
public:
|
||||
iRPropPlus(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
|
||||
BatchPropagation(feedForwardNetwork, correction) {
|
||||
}
|
||||
|
||||
iRPropPlus(const iRPropPlus&)=delete;
|
||||
iRPropPlus& operator=(const NeuralNetwork::Learning::iRPropPlus&) = delete;
|
||||
|
||||
void setInitialWeightChange(float initVal) {
|
||||
initialWeightChange=initVal;
|
||||
}
|
||||
void setLearningCoefficient(float) {
|
||||
|
||||
}
|
||||
protected:
|
||||
|
||||
virtual inline void resize() override {
|
||||
BatchPropagation::resize();
|
||||
|
||||
_lastGradients =_gradients;
|
||||
|
||||
_changesOfWeightChanges = _lastGradients;
|
||||
for(std::size_t i = 1; i < _network.size(); i++) {
|
||||
for(std::size_t j = 0; j < _changesOfWeightChanges[i].size(); j++) {
|
||||
std::fill(_changesOfWeightChanges[i][j].begin(),_changesOfWeightChanges[i][j].end(),initialWeightChange);
|
||||
}
|
||||
}
|
||||
_lastWeightChanges = _lastGradients;
|
||||
for(std::size_t i = 1; i < _network.size(); i++) {
|
||||
for(std::size_t j = 0; j < _lastWeightChanges[i].size(); j++) {
|
||||
std::fill(_lastWeightChanges[i][j].begin(),_lastWeightChanges[i][j].end(),0.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void updateWeightsAndEndBatch() override;
|
||||
|
||||
std::vector<std::vector<std::vector<float>>> _lastGradients = {};
|
||||
std::vector<std::vector<std::vector<float>>> _lastWeightChanges = {};
|
||||
std::vector<std::vector<std::vector<float>>> _changesOfWeightChanges = {};
|
||||
|
||||
float _prevError=0;
|
||||
|
||||
float maxChangeOfWeights = 5;
|
||||
float minChangeOfWeights = 0.0001;
|
||||
|
||||
float initialWeightChange=0.02;
|
||||
float weightChangePlus=1.2;
|
||||
float weightChangeMinus=0.5;
|
||||
};
|
||||
}
|
||||
}
|
||||
52
src/NeuralNetwork/Learning/iRPropPlus.cpp
Normal file
52
src/NeuralNetwork/Learning/iRPropPlus.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
#include <NeuralNetwork/Learning/iRPropPlus.h>
|
||||
|
||||
void NeuralNetwork::Learning::iRPropPlus::updateWeightsAndEndBatch() {
|
||||
float error = 0.0;
|
||||
const auto& outputLayer=_network[_network.size()-1];
|
||||
for(std::size_t j=1;j<outputLayer.size();j++) {
|
||||
error+=_slopes[_network.size()-1][j];
|
||||
}
|
||||
|
||||
error /= outputLayer.size();
|
||||
|
||||
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
|
||||
auto &layer = _network[layerIndex];
|
||||
auto &prevLayer = _network[layerIndex - 1];
|
||||
|
||||
std::size_t prevLayerSize = prevLayer.size();
|
||||
std::size_t layerSize = layer.size();
|
||||
|
||||
for(std::size_t j = 1; j < layerSize; j++) {
|
||||
for(std::size_t k = 0; k < prevLayerSize; k++) {
|
||||
float gradient = _gradients[layerIndex][j][k];
|
||||
float lastGradient = _lastGradients[layerIndex][j][k];
|
||||
|
||||
_lastGradients[layerIndex][j][k] = gradient;
|
||||
|
||||
float weightChangeDelta = _changesOfWeightChanges[layerIndex][j][k];
|
||||
float delta;
|
||||
|
||||
if(gradient * lastGradient > 0) {
|
||||
weightChangeDelta = std::min(weightChangeDelta*weightChangePlus,maxChangeOfWeights);
|
||||
delta = (std::signbit(gradient)? 1.0f : -1.0f ) * weightChangeDelta;
|
||||
layer[j].weight(k) -= delta;
|
||||
} else if (gradient * lastGradient < 0) {
|
||||
weightChangeDelta = std::max(weightChangeDelta*weightChangeMinus,minChangeOfWeights);
|
||||
delta = _lastWeightChanges[layerIndex][j][k];
|
||||
if(error > _prevError) {
|
||||
layer[j].weight(k) += delta;
|
||||
}
|
||||
_lastGradients[layerIndex][j][k] = 0;
|
||||
} else {
|
||||
delta = (std::signbit(gradient)? 1.0f : -1.0f ) * weightChangeDelta;
|
||||
layer[j].weight(k) -= delta;
|
||||
}
|
||||
//std::cout << delta <<"\n";
|
||||
|
||||
_changesOfWeightChanges[layerIndex][j][k] = weightChangeDelta;
|
||||
_lastWeightChanges[layerIndex][j][k] = delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
_prevError = error;
|
||||
}
|
||||
Reference in New Issue
Block a user