quickProp implementation after refactoring
This commit is contained in:
@@ -4,7 +4,7 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include <NeuralNetwork/FeedForward/Network.h>
|
#include <NeuralNetwork/FeedForward/Network.h>
|
||||||
#include "BackPropagation.h"
|
#include "BatchPropagation.h"
|
||||||
|
|
||||||
namespace NeuralNetwork {
|
namespace NeuralNetwork {
|
||||||
namespace Learning {
|
namespace Learning {
|
||||||
@@ -12,27 +12,33 @@ namespace NeuralNetwork {
|
|||||||
/** @class QuickPropagation
|
/** @class QuickPropagation
|
||||||
* @brief
|
* @brief
|
||||||
*/
|
*/
|
||||||
class QuickPropagation : public BackPropagation {
|
class QuickPropagation : public BatchPropagation {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
inline QuickPropagation(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
|
inline QuickPropagation(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
|
||||||
BackPropagation(feedForwardNetwork,correction) {
|
BatchPropagation(feedForwardNetwork,correction) {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~QuickPropagation() {
|
virtual ~QuickPropagation() {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
void setLearningCoefficient (const float& coefficient) {
|
||||||
|
|
||||||
float _maxChange=1.75;
|
|
||||||
float _epsilon=0.5;
|
|
||||||
|
|
||||||
virtual inline void resize() override {
|
|
||||||
BackPropagation::resize();
|
|
||||||
_previousSlopes = _slopes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<float>> _previousSlopes ={};
|
protected:
|
||||||
|
|
||||||
|
virtual void updateWeightsAndEndBatch() override;
|
||||||
|
|
||||||
|
float _maxChange=1.75;
|
||||||
|
|
||||||
|
virtual inline void resize() override {
|
||||||
|
BatchPropagation::resize();
|
||||||
|
_lastGradients = _gradients;
|
||||||
|
_lastDeltas = _gradients;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<float>>> _lastDeltas = {};
|
||||||
|
std::vector<std::vector<std::vector<float>>> _lastGradients = {};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
#include <NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h>
|
#include <NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h>
|
||||||
|
|
||||||
|
#include <NeuralNetwork/Learning/BackPropagation.h>
|
||||||
|
|
||||||
using namespace NeuralNetwork::ConstructiveAlgorihtms;
|
using namespace NeuralNetwork::ConstructiveAlgorihtms;
|
||||||
|
|
||||||
float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector <CascadeCorrelation::TrainingPattern> &patterns) {
|
float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector <CascadeCorrelation::TrainingPattern> &patterns) {
|
||||||
|
|||||||
@@ -1,70 +1,46 @@
|
|||||||
#include <NeuralNetwork/Learning/QuickPropagation.h>
|
#include <NeuralNetwork/Learning/QuickPropagation.h>
|
||||||
|
|
||||||
#include <cassert>
|
void NeuralNetwork::Learning::QuickPropagation::updateWeightsAndEndBatch() {
|
||||||
#include <immintrin.h>
|
|
||||||
|
|
||||||
void NeuralNetwork::Learning::QuickPropagation::computeDeltas(const std::vector<float> &input) {
|
float shrinkFactor=_maxChange/(_maxChange+1.0f);
|
||||||
|
|
||||||
float shrinkFactor=_maxChange/(_maxChange+1.0);
|
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
|
||||||
|
auto &layer = _network[layerIndex];
|
||||||
|
auto &prevLayer = _network[layerIndex - 1];
|
||||||
|
|
||||||
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
|
std::size_t prevLayerSize = prevLayer.size();
|
||||||
auto &layer=network[layerIndex];
|
std::size_t layerSize = layer.size();
|
||||||
auto &prevLayer=network[layerIndex-1];
|
|
||||||
|
|
||||||
std::size_t prevLayerSize=prevLayer.size();
|
for(std::size_t j = 1; j < layerSize; j++) {
|
||||||
std::size_t layerSize=layer.size();
|
for(std::size_t k = 0; k < prevLayerSize; k++) {
|
||||||
|
float newChange = 0.0f;
|
||||||
|
float _epsilon = 0.9;
|
||||||
|
if(fabs (_gradients[layerIndex][j][k])> 0.0001) {
|
||||||
|
if(std::signbit(_gradients[layerIndex][j][k]) == std::signbit(_lastGradients[layerIndex][j][k])) {
|
||||||
|
newChange+= _gradients[layerIndex][j][k]*_epsilon;
|
||||||
|
|
||||||
for(std::size_t j=1;j<layerSize;j++) {
|
if(fabs(_gradients[layerIndex][j][k]) > fabs(shrinkFactor * _lastGradients[layerIndex][j][k])) {
|
||||||
for(std::size_t k=1;k<layerSize;k++) {
|
newChange += _maxChange * _gradients[layerIndex][j][k];
|
||||||
double update = 0.0;
|
}else {
|
||||||
|
newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
double inputValue = 0.0;
|
newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
|
||||||
if(layerIndex==1) {
|
|
||||||
inputValue=input[k-1];
|
|
||||||
} else {
|
|
||||||
inputValue=prevLayer[k].output();
|
|
||||||
}
|
|
||||||
|
|
||||||
if(currentBatchSize == 0) {
|
|
||||||
gradients[layerIndex][j][k] = update * inputValue;
|
|
||||||
} else {
|
|
||||||
gradients[layerIndex][j][k] += update * inputValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
float newChange=0;
|
|
||||||
|
|
||||||
if(fabs (gradients[layerIndex][j])> 0.0001) {
|
|
||||||
if(std::signbit(gradients[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
|
|
||||||
newChange+= slopes[layerIndex][j]*_epsilon;
|
|
||||||
|
|
||||||
if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
|
|
||||||
newChange += _maxChange * gradients[layerIndex][j];
|
|
||||||
}else {
|
|
||||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
|
newChange+= _lastGradients[layerIndex][j][k]*_epsilon;
|
||||||
}
|
}
|
||||||
} else {
|
_lastDeltas[layerIndex][j][k]= newChange;
|
||||||
newChange+= slopes[layerIndex][j]*_epsilon;
|
layer[j].weight(k)+= newChange;
|
||||||
|
|
||||||
|
/* This is according to paper?
|
||||||
|
// delta = _gradients[layerIndex][j][k] / (_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
|
||||||
|
// delta = std::min(_maxChange,delta);
|
||||||
|
_lastDeltas[layerIndex][j][k] = delta;
|
||||||
|
layer[j].weight(k)+= delta;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
weightChange[layerIndex][j]=newChange;
|
|
||||||
|
|
||||||
layer[j].weight(0)+=newChange;
|
|
||||||
|
|
||||||
for(std::size_t k=1;k<prevLayerSize;k++) {
|
|
||||||
if(layerIndex==1) {
|
|
||||||
layer[j].weight(k)+=newChange*(input[k-1]);
|
|
||||||
} else {
|
|
||||||
layer[j].weight(k)+=newChange*(prevLayer[k].output());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_lastGradients.swap(_gradients);
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user