quickProp implementation after refactoring

2016-11-01 21:40:00 +01:00
parent 173cfc9789
commit 8b2a4e89b3
3 changed files with 52 additions and 68 deletions
--- a/include/NeuralNetwork/Learning/QuickPropagation.h
+++ b/include/NeuralNetwork/Learning/QuickPropagation.h
@@ -4,7 +4,7 @@
 #include <cmath>

 #include <NeuralNetwork/FeedForward/Network.h>
-#include "BackPropagation.h"
+#include "BatchPropagation.h"

 namespace NeuralNetwork {
 	namespace Learning {
@@ -12,27 +12,33 @@ namespace NeuralNetwork {
 		/** @class QuickPropagation
 		 * @brief
 		 */
-		class QuickPropagation : public BackPropagation {
+		class QuickPropagation : public BatchPropagation {

 		public:
 			inline QuickPropagation(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
-				BackPropagation(feedForwardNetwork,correction) {
+				BatchPropagation(feedForwardNetwork,correction) {
 			}

 			virtual ~QuickPropagation() {
 			}

-		protected:
-
-			float _maxChange=1.75;
-			float _epsilon=0.5;
-
-			virtual inline void resize() override {
-				BackPropagation::resize();
-				_previousSlopes = _slopes;
+			void setLearningCoefficient (const float& coefficient) {
 			}

-			std::vector<std::vector<float>> _previousSlopes ={};
+			protected:
+
+			virtual void updateWeightsAndEndBatch() override;
+
+			float _maxChange=1.75;
+
+			virtual inline void resize() override {
+				BatchPropagation::resize();
+				_lastGradients = _gradients;
+				_lastDeltas = _gradients;
+			}
+
+			std::vector<std::vector<std::vector<float>>> _lastDeltas = {};
+			std::vector<std::vector<std::vector<float>>> _lastGradients = {};
 		};
 	}
 }
--- a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
+++ b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
@@ -1,5 +1,7 @@
 #include <NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h>

+#include <NeuralNetwork/Learning/BackPropagation.h>
+
 using namespace NeuralNetwork::ConstructiveAlgorihtms;

 float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector <CascadeCorrelation::TrainingPattern> &patterns) {
--- a/src/NeuralNetwork/Learning/QuickPropagation.cpp
+++ b/src/NeuralNetwork/Learning/QuickPropagation.cpp
@@ -1,70 +1,46 @@
 #include <NeuralNetwork/Learning/QuickPropagation.h>

-#include <cassert>
-#include <immintrin.h>
+void NeuralNetwork::Learning::QuickPropagation::updateWeightsAndEndBatch() {

-void NeuralNetwork::Learning::QuickPropagation::computeDeltas(const std::vector<float> &input) {
+	float shrinkFactor=_maxChange/(_maxChange+1.0f);

-	float shrinkFactor=_maxChange/(_maxChange+1.0);
+	for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
+		auto &layer = _network[layerIndex];
+		auto &prevLayer = _network[layerIndex - 1];

-	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
-		auto &layer=network[layerIndex];
-		auto &prevLayer=network[layerIndex-1];
+		std::size_t prevLayerSize = prevLayer.size();
+		std::size_t layerSize = layer.size();

-		std::size_t prevLayerSize=prevLayer.size();
-		std::size_t layerSize=layer.size();
+		for(std::size_t j = 1; j < layerSize; j++) {
+			for(std::size_t k = 0; k < prevLayerSize; k++) {
+				float newChange = 0.0f;
+				float _epsilon = 0.9;
+				if(fabs (_gradients[layerIndex][j][k])> 0.0001) {
+					if(std::signbit(_gradients[layerIndex][j][k]) == std::signbit(_lastGradients[layerIndex][j][k])) {
+						newChange+= _gradients[layerIndex][j][k]*_epsilon;

-		for(std::size_t j=1;j<layerSize;j++)  {
-			for(std::size_t k=1;k<layerSize;k++) {
-				double update = 0.0;
-
-
-
-				double inputValue = 0.0;
-				if(layerIndex==1) {
-					inputValue=input[k-1];
-				} else {
-					inputValue=prevLayer[k].output();
-				}
-
-				if(currentBatchSize == 0) {
-					gradients[layerIndex][j][k] = update * inputValue;
-				} else {
-					gradients[layerIndex][j][k] += update * inputValue;
-				}
-
-			}
-/*
-			float newChange=0;
-
-			if(fabs (gradients[layerIndex][j])> 0.0001) {
-				if(std::signbit(gradients[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
-					newChange+= slopes[layerIndex][j]*_epsilon;
-
-					if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
-						newChange += _maxChange * gradients[layerIndex][j];
-					}else {
-						newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
+						if(fabs(_gradients[layerIndex][j][k]) > fabs(shrinkFactor * _lastGradients[layerIndex][j][k])) {
+							newChange += _maxChange * _gradients[layerIndex][j][k];
+						}else {
+							newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
+						}
+					} else {
+						newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
 					}
 				} else {
-					newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
+					newChange+= _lastGradients[layerIndex][j][k]*_epsilon;
 				}
-			} else {
-				newChange+= slopes[layerIndex][j]*_epsilon;
+				_lastDeltas[layerIndex][j][k]= newChange;
+				layer[j].weight(k)+= newChange;
+
+	/*          This is according to paper?
+	//			delta = _gradients[layerIndex][j][k] / (_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
+	//			delta = std::min(_maxChange,delta);
+				_lastDeltas[layerIndex][j][k] = delta;
+				layer[j].weight(k)+= delta;
+	 */
 			}
-
-			weightChange[layerIndex][j]=newChange;
-
-			layer[j].weight(0)+=newChange;
-
-			for(std::size_t k=1;k<prevLayerSize;k++) {
-				if(layerIndex==1) {
-					layer[j].weight(k)+=newChange*(input[k-1]);
-				} else {
-					layer[j].weight(k)+=newChange*(prevLayer[k].output());
-				}
-			}
-			*/
 		}
 	}
+	_lastGradients.swap(_gradients);
 }