From 8b2a4e89b349badf694ae60bb57f1ea529526bd1 Mon Sep 17 00:00:00 2001 From: Shin Date: Tue, 1 Nov 2016 21:40:00 +0100 Subject: [PATCH] quickProp implementation after refactoring --- .../NeuralNetwork/Learning/QuickPropagation.h | 30 ++++--- .../CascadeCorrelation.cpp | 2 + .../Learning/QuickPropagation.cpp | 88 +++++++------------ 3 files changed, 52 insertions(+), 68 deletions(-) diff --git a/include/NeuralNetwork/Learning/QuickPropagation.h b/include/NeuralNetwork/Learning/QuickPropagation.h index 358548a..e790ae7 100644 --- a/include/NeuralNetwork/Learning/QuickPropagation.h +++ b/include/NeuralNetwork/Learning/QuickPropagation.h @@ -4,7 +4,7 @@ #include #include -#include "BackPropagation.h" +#include "BatchPropagation.h" namespace NeuralNetwork { namespace Learning { @@ -12,27 +12,33 @@ namespace NeuralNetwork { /** @class QuickPropagation * @brief */ - class QuickPropagation : public BackPropagation { + class QuickPropagation : public BatchPropagation { public: inline QuickPropagation(FeedForward::Network &feedForwardNetwork, std::shared_ptr correction = std::make_shared()): - BackPropagation(feedForwardNetwork,correction) { + BatchPropagation(feedForwardNetwork,correction) { } virtual ~QuickPropagation() { } - protected: - - float _maxChange=1.75; - float _epsilon=0.5; - - virtual inline void resize() override { - BackPropagation::resize(); - _previousSlopes = _slopes; + void setLearningCoefficient (const float& coefficient) { } - std::vector> _previousSlopes ={}; + protected: + + virtual void updateWeightsAndEndBatch() override; + + float _maxChange=1.75; + + virtual inline void resize() override { + BatchPropagation::resize(); + _lastGradients = _gradients; + _lastDeltas = _gradients; + } + + std::vector>> _lastDeltas = {}; + std::vector>> _lastGradients = {}; }; } } \ No newline at end of file diff --git a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp index f05de73..77ff5e3 100644 --- a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp +++ b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp @@ -1,5 +1,7 @@ #include +#include + using namespace NeuralNetwork::ConstructiveAlgorihtms; float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector &patterns) { diff --git a/src/NeuralNetwork/Learning/QuickPropagation.cpp b/src/NeuralNetwork/Learning/QuickPropagation.cpp index 3939c50..80e5df5 100644 --- a/src/NeuralNetwork/Learning/QuickPropagation.cpp +++ b/src/NeuralNetwork/Learning/QuickPropagation.cpp @@ -1,70 +1,46 @@ #include -#include -#include +void NeuralNetwork::Learning::QuickPropagation::updateWeightsAndEndBatch() { -void NeuralNetwork::Learning::QuickPropagation::computeDeltas(const std::vector &input) { + float shrinkFactor=_maxChange/(_maxChange+1.0f); - float shrinkFactor=_maxChange/(_maxChange+1.0); + for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) { + auto &layer = _network[layerIndex]; + auto &prevLayer = _network[layerIndex - 1]; - for(std::size_t layerIndex=1;layerIndex 0.0001) { + if(std::signbit(_gradients[layerIndex][j][k]) == std::signbit(_lastGradients[layerIndex][j][k])) { + newChange+= _gradients[layerIndex][j][k]*_epsilon; - for(std::size_t j=1;j 0.0001) { - if(std::signbit(gradients[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) { - newChange+= slopes[layerIndex][j]*_epsilon; - - if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) { - newChange += _maxChange * gradients[layerIndex][j]; - }else { - newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j]; + if(fabs(_gradients[layerIndex][j][k]) > fabs(shrinkFactor * _lastGradients[layerIndex][j][k])) { + newChange += _maxChange * _gradients[layerIndex][j][k]; + }else { + newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k]; + } + } else { + newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k]; } } else { - newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j]; + newChange+= _lastGradients[layerIndex][j][k]*_epsilon; } - } else { - newChange+= slopes[layerIndex][j]*_epsilon; + _lastDeltas[layerIndex][j][k]= newChange; + layer[j].weight(k)+= newChange; + + /* This is according to paper? + // delta = _gradients[layerIndex][j][k] / (_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k]; + // delta = std::min(_maxChange,delta); + _lastDeltas[layerIndex][j][k] = delta; + layer[j].weight(k)+= delta; + */ } - - weightChange[layerIndex][j]=newChange; - - layer[j].weight(0)+=newChange; - - for(std::size_t k=1;k