quickProp implementation after refactoring

This commit is contained in:
2016-11-01 21:40:00 +01:00
parent 173cfc9789
commit 8b2a4e89b3
3 changed files with 52 additions and 68 deletions

View File

@@ -4,7 +4,7 @@
#include <cmath>
#include <NeuralNetwork/FeedForward/Network.h>
#include "BackPropagation.h"
#include "BatchPropagation.h"
namespace NeuralNetwork {
namespace Learning {
@@ -12,27 +12,33 @@ namespace NeuralNetwork {
/** @class QuickPropagation
* @brief
*/
class QuickPropagation : public BackPropagation {
class QuickPropagation : public BatchPropagation {
public:
inline QuickPropagation(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
BackPropagation(feedForwardNetwork,correction) {
BatchPropagation(feedForwardNetwork,correction) {
}
virtual ~QuickPropagation() {
}
protected:
float _maxChange=1.75;
float _epsilon=0.5;
virtual inline void resize() override {
BackPropagation::resize();
_previousSlopes = _slopes;
void setLearningCoefficient (const float& coefficient) {
}
std::vector<std::vector<float>> _previousSlopes ={};
protected:
virtual void updateWeightsAndEndBatch() override;
float _maxChange=1.75;
virtual inline void resize() override {
BatchPropagation::resize();
_lastGradients = _gradients;
_lastDeltas = _gradients;
}
std::vector<std::vector<std::vector<float>>> _lastDeltas = {};
std::vector<std::vector<std::vector<float>>> _lastGradients = {};
};
}
}

View File

@@ -1,5 +1,7 @@
#include <NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h>
#include <NeuralNetwork/Learning/BackPropagation.h>
using namespace NeuralNetwork::ConstructiveAlgorihtms;
float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector <CascadeCorrelation::TrainingPattern> &patterns) {

View File

@@ -1,70 +1,46 @@
#include <NeuralNetwork/Learning/QuickPropagation.h>
#include <cassert>
#include <immintrin.h>
void NeuralNetwork::Learning::QuickPropagation::updateWeightsAndEndBatch() {
void NeuralNetwork::Learning::QuickPropagation::computeDeltas(const std::vector<float> &input) {
float shrinkFactor=_maxChange/(_maxChange+1.0f);
float shrinkFactor=_maxChange/(_maxChange+1.0);
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
auto &layer = _network[layerIndex];
auto &prevLayer = _network[layerIndex - 1];
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
auto &layer=network[layerIndex];
auto &prevLayer=network[layerIndex-1];
std::size_t prevLayerSize = prevLayer.size();
std::size_t layerSize = layer.size();
std::size_t prevLayerSize=prevLayer.size();
std::size_t layerSize=layer.size();
for(std::size_t j = 1; j < layerSize; j++) {
for(std::size_t k = 0; k < prevLayerSize; k++) {
float newChange = 0.0f;
float _epsilon = 0.9;
if(fabs (_gradients[layerIndex][j][k])> 0.0001) {
if(std::signbit(_gradients[layerIndex][j][k]) == std::signbit(_lastGradients[layerIndex][j][k])) {
newChange+= _gradients[layerIndex][j][k]*_epsilon;
for(std::size_t j=1;j<layerSize;j++) {
for(std::size_t k=1;k<layerSize;k++) {
double update = 0.0;
double inputValue = 0.0;
if(layerIndex==1) {
inputValue=input[k-1];
} else {
inputValue=prevLayer[k].output();
}
if(currentBatchSize == 0) {
gradients[layerIndex][j][k] = update * inputValue;
} else {
gradients[layerIndex][j][k] += update * inputValue;
}
}
/*
float newChange=0;
if(fabs (gradients[layerIndex][j])> 0.0001) {
if(std::signbit(gradients[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
newChange+= slopes[layerIndex][j]*_epsilon;
if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
newChange += _maxChange * gradients[layerIndex][j];
if(fabs(_gradients[layerIndex][j][k]) > fabs(shrinkFactor * _lastGradients[layerIndex][j][k])) {
newChange += _maxChange * _gradients[layerIndex][j][k];
}else {
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
}
} else {
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
}
} else {
newChange+= slopes[layerIndex][j]*_epsilon;
newChange+= _lastGradients[layerIndex][j][k]*_epsilon;
}
_lastDeltas[layerIndex][j][k]= newChange;
layer[j].weight(k)+= newChange;
weightChange[layerIndex][j]=newChange;
layer[j].weight(0)+=newChange;
for(std::size_t k=1;k<prevLayerSize;k++) {
if(layerIndex==1) {
layer[j].weight(k)+=newChange*(input[k-1]);
} else {
layer[j].weight(k)+=newChange*(prevLayer[k].output());
}
}
/* This is according to paper?
// delta = _gradients[layerIndex][j][k] / (_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
// delta = std::min(_maxChange,delta);
_lastDeltas[layerIndex][j][k] = delta;
layer[j].weight(k)+= delta;
*/
}
}
}
_lastGradients.swap(_gradients);
}