From 9fbcb727a2d34ebfcea9c1f48034eb1b64ff7ab6 Mon Sep 17 00:00:00 2001
From: Shin <Tom.Cernik@gmail.com>
Date: Wed, 7 Sep 2016 22:26:50 +0200
Subject: [PATCH] backprop: momentums + decay, quickprop: renaming

---
 .../NeuralNetwork/Learning/BackPropagation.h  | 38 ++++++++++++++++++-
 .../NeuralNetwork/Learning/QuickPropagation.h | 26 ++++---------
 .../Learning/BackPropagation.cpp              | 18 ++++++---
 .../Learning/QuickPropagation.cpp             | 12 +++---
 4 files changed, 62 insertions(+), 32 deletions(-)
diff --git a/include/NeuralNetwork/Learning/BackPropagation.h b/include/NeuralNetwork/Learning/BackPropagation.h
index cb61d42..c880519 100644
--- a/include/NeuralNetwork/Learning/BackPropagation.h
+++ b/include/NeuralNetwork/Learning/BackPropagation.h
@@ -31,6 +31,22 @@ namespace Learning {
 
 			inline virtual void setLearningCoefficient (const float& coefficient) { learningCoefficient=coefficient; }
 
+			float getMomentumWeight() const {
+				return momentumWeight;
+			}
+
+			void setMomentumWeight(const float& m) {
+				momentumWeight=m;
+			}
+
+			float getWeightDecay() const {
+				return weightDecay;
+			}
+
+			void setWeightDecay(const float& wd) {
+				weightDecay=wd;
+			}
+
 		protected:
 
 			virtual inline void resize() {
@@ -41,11 +57,25 @@ namespace Learning {
 					if(slopes[i].size()!=network[i].size())
 						slopes[i].resize(network[i].size());
 				}
+
+				if(lastDeltas.size()!=network.size())
+					lastDeltas.resize(network.size());
+
+				for(std::size_t i=0; i < network.size(); i++) {
+					if(lastDeltas[i].size()!=network[i].size()) {
+						lastDeltas[i].resize(network[i].size());
+
+						for(std::size_t j = 0; j < lastDeltas[i].size(); j++) {
+							lastDeltas[i][j] = 0.0;
+						}
+					}
+				}
+				deltas= lastDeltas;
 			}
 
 			virtual void updateWeights(const std::vector<float> &input);
 
-			virtual void computeDeltas(const std::vector<float> &expectation);
+			virtual void computeSlopes(const std::vector<float> &expectation);
 
 			FeedForward::Network &network;
 
@@ -53,7 +83,13 @@ namespace Learning {
 
 			float learningCoefficient;
 
+			float momentumWeight = 0.0;
+
+			float weightDecay = 0.0;
+
 			std::vector<std::vector<float>> slopes;
+			std::vector<std::vector<float>> deltas;
+			std::vector<std::vector<float>> lastDeltas;
 	};
 }
 }
\ No newline at end of file
diff --git a/include/NeuralNetwork/Learning/QuickPropagation.h b/include/NeuralNetwork/Learning/QuickPropagation.h
index 71edec9..48f4f98 100644
--- a/include/NeuralNetwork/Learning/QuickPropagation.h
+++ b/include/NeuralNetwork/Learning/QuickPropagation.h
@@ -36,36 +36,24 @@ namespace NeuralNetwork {
 						slopes[i].resize(network[i].size());
 				}
 
-				if(previousSlopes.size()!=network.size())
-					previousSlopes.resize(network.size());
+				if(deltas.size()!=network.size())
+					deltas.resize(network.size());
 
 				for(std::size_t i=0; i < network.size(); i++) {
-					if(previousSlopes[i].size()!=network[i].size())
-						previousSlopes[i].resize(network[i].size());
+					if(deltas[i].size()!=network[i].size())
+						deltas[i].resize(network[i].size());
 
 					for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
-						previousSlopes[i][j]=1.0;
+						deltas[i][j]=1.0;
 					}
 				}
-
-				if(lastWeightChange.size()!=network.size())
-					lastWeightChange.resize(network.size());
-
-				for(std::size_t i=0; i < network.size(); i++) {
-					if(lastWeightChange[i].size()!=network[i].size())
-						lastWeightChange[i].resize(network[i].size());
-
-					for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
-						lastWeightChange[i][j]=1.0;
-					}
-				}
-				weightChange= lastWeightChange;
+				weightChange= deltas;
 			}
 
 			virtual void updateWeights(const std::vector<float> &input) override;
 
 			std::vector<std::vector<float>> previousSlopes ={};
-			std::vector<std::vector<float>> lastWeightChange ={};
+			std::vector<std::vector<float>> deltas ={};
 			std::vector<std::vector<float>> weightChange ={};
 		};
 	}
diff --git a/src/NeuralNetwork/Learning/BackPropagation.cpp b/src/NeuralNetwork/Learning/BackPropagation.cpp
index 616de16..9801169 100644
--- a/src/NeuralNetwork/Learning/BackPropagation.cpp
+++ b/src/NeuralNetwork/Learning/BackPropagation.cpp
@@ -9,9 +9,11 @@ void NeuralNetwork::Learning::BackPropagation::teach(const std::vector<float> &i
 
 	resize();
 
-	computeDeltas(expectation);
+	computeSlopes(expectation);
 
 	updateWeights(input);
+
+	std::swap(deltas,lastDeltas);
 }
 
 
@@ -28,21 +30,25 @@ void NeuralNetwork::Learning::BackPropagation::updateWeights(const std::vector<f
 
 			float delta =slopes[layerIndex][j]*learningCoefficient;
 
-			layer[j].weight(0)+=delta;
+			//momentum
+			delta += momentumWeight * lastDeltas[layerIndex][j];
+
+			deltas[layerIndex][j]=delta;
+
+			layer[j].weight(0)+=delta - weightDecay *layer[j].weight(0);
 
 			for(std::size_t k=1;k<prevLayerSize;k++) {
 				if(layerIndex==1) {
-					layer[j].weight(k)+=delta*input[k-1];
+					layer[j].weight(k)+=delta*input[k-1] - weightDecay * layer[j].weight(k);
 				} else {
-					layer[j].weight(k)+=delta*prevLayer[k].output();
+					layer[j].weight(k)+=delta*prevLayer[k].output() - weightDecay * layer[j].weight(k);
 				}
 			}
 		}
 	}
-
 }
 
-void NeuralNetwork::Learning::BackPropagation::computeDeltas(const std::vector<float> &expectation) {
+void NeuralNetwork::Learning::BackPropagation::computeSlopes(const std::vector<float> &expectation) {
 	auto& outputLayer=network[network.size()-1];
 	for(std::size_t j=1;j<outputLayer.size();j++) {
 		auto& neuron = outputLayer[j];
diff --git a/src/NeuralNetwork/Learning/QuickPropagation.cpp b/src/NeuralNetwork/Learning/QuickPropagation.cpp
index bf9f789..c5a0fda 100644
--- a/src/NeuralNetwork/Learning/QuickPropagation.cpp
+++ b/src/NeuralNetwork/Learning/QuickPropagation.cpp
@@ -18,17 +18,17 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
 
 			float newChange=0;
 
-			if(fabs (lastWeightChange[layerIndex][j])> 0.0001) {
-				if(std::signbit(lastWeightChange[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
+			if(fabs (deltas[layerIndex][j])> 0.0001) {
+				if(std::signbit(deltas[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
 					newChange+= slopes[layerIndex][j]*_epsilon;
 
 					if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
-						newChange += _maxChange * lastWeightChange[layerIndex][j];
+						newChange += _maxChange * deltas[layerIndex][j];
 					}else {
-						newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * lastWeightChange[layerIndex][j];
+						newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
 					}
 				} else {
-					newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * lastWeightChange[layerIndex][j];
+					newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
 				}
 			} else {
 				newChange+= slopes[layerIndex][j]*_epsilon;
@@ -49,5 +49,5 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
 	}
 
 	slopes.swap(previousSlopes);
-	weightChange.swap(lastWeightChange);
+	weightChange.swap(deltas);
 }
\ No newline at end of file