quickProp implementation after refactoring

2016-11-01 21:40:00 +01:00
parent 173cfc9789
commit 8b2a4e89b3
3 changed files with 52 additions and 68 deletions
--- a/include/NeuralNetwork/Learning/QuickPropagation.h
+++ b/include/NeuralNetwork/Learning/QuickPropagation.h
@@ -4,7 +4,7 @@
 #include <cmath>
 #include <NeuralNetwork/FeedForward/Network.h>
-#include "BackPropagation.h"
+#include "BatchPropagation.h"
 namespace NeuralNetwork {
 	namespace Learning {
@@ -12,27 +12,33 @@ namespace NeuralNetwork {
 		/** @class QuickPropagation
 		 * @brief
 		 */
-		class QuickPropagation : public BackPropagation {
+		class QuickPropagation : public BatchPropagation {
 		public:
 			inline QuickPropagation(FeedForward::Network &feedForwardNetwork, std::shared_ptr<CorrectionFunction::CorrectionFunction> correction = std::make_shared<CorrectionFunction::Linear>()):
-				BackPropagation(feedForwardNetwork,correction) {
+				BatchPropagation(feedForwardNetwork,correction) {
 			}
 			virtual ~QuickPropagation() {
 			}
-		protected:
+			void setLearningCoefficient (const float& coefficient) {
 			float _maxChange=1.75;
 			float _epsilon=0.5;
 			virtual inline void resize() override {
 				BackPropagation::resize();
 				_previousSlopes = _slopes;
 			}
-			std::vector<std::vector<float>> _previousSlopes ={};
+			protected:
 			virtual void updateWeightsAndEndBatch() override;
 			float _maxChange=1.75;
 			virtual inline void resize() override {
 				BatchPropagation::resize();
 				_lastGradients = _gradients;
 				_lastDeltas = _gradients;
 			}
 			std::vector<std::vector<std::vector<float>>> _lastDeltas = {};
 			std::vector<std::vector<std::vector<float>>> _lastGradients = {};
 		};
 	}
 }
--- a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
+++ b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
@@ -1,5 +1,7 @@
 #include <NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h>
 #include <NeuralNetwork/Learning/BackPropagation.h>
 using namespace NeuralNetwork::ConstructiveAlgorihtms;
 float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector <CascadeCorrelation::TrainingPattern> &patterns) {
--- a/src/NeuralNetwork/Learning/QuickPropagation.cpp
+++ b/src/NeuralNetwork/Learning/QuickPropagation.cpp
@@ -1,70 +1,46 @@
 #include <NeuralNetwork/Learning/QuickPropagation.h>
-#include <cassert>
+void NeuralNetwork::Learning::QuickPropagation::updateWeightsAndEndBatch() {
 #include <immintrin.h>
-void NeuralNetwork::Learning::QuickPropagation::computeDeltas(const std::vector<float> &input) {
+	float shrinkFactor=_maxChange/(_maxChange+1.0f);
-	float shrinkFactor=_maxChange/(_maxChange+1.0);
+	for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
-
+		auto &layer = _network[layerIndex];
-	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
+		auto &prevLayer = _network[layerIndex - 1];
 		auto &layer=network[layerIndex];
 		auto &prevLayer=network[layerIndex-1];
 		std::size_t prevLayerSize = prevLayer.size();
 		std::size_t layerSize = layer.size();
 		for(std::size_t j = 1; j < layerSize; j++) {
-			for(std::size_t k=1;k<layerSize;k++) {
+			for(std::size_t k = 0; k < prevLayerSize; k++) {
-				double update = 0.0;
+				float newChange = 0.0f;
 				float _epsilon = 0.9;
 				if(fabs (_gradients[layerIndex][j][k])> 0.0001) {
 					if(std::signbit(_gradients[layerIndex][j][k]) == std::signbit(_lastGradients[layerIndex][j][k])) {
 						newChange+= _gradients[layerIndex][j][k]*_epsilon;
-
+						if(fabs(_gradients[layerIndex][j][k]) > fabs(shrinkFactor * _lastGradients[layerIndex][j][k])) {
-
+							newChange += _maxChange * _gradients[layerIndex][j][k];
 				double inputValue = 0.0;
 				if(layerIndex==1) {
 					inputValue=input[k-1];
 						}else {
-					inputValue=prevLayer[k].output();
+							newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
 				}
 				if(currentBatchSize == 0) {
 					gradients[layerIndex][j][k] = update * inputValue;
 				} else {
 					gradients[layerIndex][j][k] += update * inputValue;
 				}
 			}
 /*
 			float newChange=0;
 			if(fabs (gradients[layerIndex][j])> 0.0001) {
 				if(std::signbit(gradients[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
 					newChange+= slopes[layerIndex][j]*_epsilon;
 					if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
 						newChange += _maxChange * gradients[layerIndex][j];
 					}else {
 						newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
 						}
 					} else {
-					newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
+						newChange+=_gradients[layerIndex][j][k]/(_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
 					}
 				} else {
-				newChange+= slopes[layerIndex][j]*_epsilon;
+					newChange+= _lastGradients[layerIndex][j][k]*_epsilon;
 				}
 				_lastDeltas[layerIndex][j][k]= newChange;
 				layer[j].weight(k)+= newChange;
-			weightChange[layerIndex][j]=newChange;
+	/*          This is according to paper?
-
+	//			delta = _gradients[layerIndex][j][k] / (_lastGradients[layerIndex][j][k]-_gradients[layerIndex][j][k]) * _lastDeltas[layerIndex][j][k];
-			layer[j].weight(0)+=newChange;
+	//			delta = std::min(_maxChange,delta);
-
+				_lastDeltas[layerIndex][j][k] = delta;
-			for(std::size_t k=1;k<prevLayerSize;k++) {
+				layer[j].weight(k)+= delta;
 				if(layerIndex==1) {
 					layer[j].weight(k)+=newChange*(input[k-1]);
 				} else {
 					layer[j].weight(k)+=newChange*(prevLayer[k].output());
 				}
 			}
 	 */
 			}
 		}
 	}
 	_lastGradients.swap(_gradients);
 }