refactored propagation

2016-10-31 15:03:27 +01:00
parent 8749b3eb03
commit 77b38dec65
19 changed files with 285 additions and 548 deletions
--- a/src/NeuralNetwork/Learning/BackPropagation.cpp
+++ b/src/NeuralNetwork/Learning/BackPropagation.cpp
@@ -1,89 +1,23 @@
 #include <NeuralNetwork/Learning/BackPropagation.h>

-#include <cassert>
-#include <immintrin.h>
-
-void NeuralNetwork::Learning::BackPropagation::teach(const std::vector<float> &input, const std::vector<float> &expectation) {
-	network.computeOutput(input);
-	resize();
-	computeSlopes(expectation);
-
-	computeDeltas(input);
-	if(++currentBatchSize >= batchSize) {
-		updateWeights();
-		endBatch();
-		currentBatchSize=0;
-	}
-}
-
-void NeuralNetwork::Learning::BackPropagation::computeSlopes(const std::vector<float> &expectation) {
-	auto& outputLayer=network[network.size()-1];
-	for(std::size_t j=1;j<outputLayer.size();j++) {
-		auto& neuron = outputLayer[j];
-		slopes[network.size()-1][j]=correctionFunction->operator()( expectation[j-1], neuron.output())*
-									neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
-	}
-
-	for(int layerIndex=static_cast<int>(network.size()-2);layerIndex>0;layerIndex--) {
-		auto &layer=network[layerIndex];
-
-		for(std::size_t j=1;j<layer.size();j++) {
-			float deltasWeight = 0;
-
-			for(std::size_t k=1;k<network[layerIndex+1].size();k++) {
-				deltasWeight+=slopes[layerIndex+1][k]* network[layerIndex+1][k].weight(j);
-			}
-
-			slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
-		}
-	}
-}
-
-void NeuralNetwork::Learning::BackPropagation::computeDeltas(const std::vector<float> &input) {
-	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
-		auto &layer=network[layerIndex];
-		auto &prevLayer=network[layerIndex-1];
-
-		std::size_t prevLayerSize=prevLayer.size();
-		std::size_t layerSize=layer.size();
-
-		for(std::size_t j=1;j<layerSize;j++)  {
-			float update = slopes[layerIndex][j];
-			for(std::size_t k=0;k<prevLayerSize;k++) {
-				float inputValue = 0.0;
-				if(layerIndex==1 && k!=0) {
-					inputValue = input[k-1];
-				} else {
-					inputValue= prevLayer[k].output();
-				}
-				if(currentBatchSize == 0) {
-					deltas[layerIndex][j][k] = update * inputValue;
-				} else {
-					deltas[layerIndex][j][k] += update * inputValue;
-				}
-			}
-		}
-	}
-}
-
-void NeuralNetwork::Learning::BackPropagation::updateWeights() {
+void NeuralNetwork::Learning::BackPropagation::updateWeightsAndEndBatch() {

 	bool enableMoments = momentumWeight > 0.0;

-	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
-		auto &layer = network[layerIndex];
-		auto &prevLayer = network[layerIndex - 1];
+	for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
+		auto &layer = _network[layerIndex];
+		auto &prevLayer = _network[layerIndex - 1];

 		std::size_t prevLayerSize = prevLayer.size();
 		std::size_t layerSize = layer.size();

 		for(std::size_t j = 1; j < layerSize; j++) {
 			for(std::size_t k = 0; k < prevLayerSize; k++) {
-				float delta = deltas[layerIndex][j][k]*learningCoefficient - weightDecay * layer[j].weight(k);
+				float delta = _gradients[layerIndex][j][k]*learningCoefficient - weightDecay * layer[j].weight(k);

 				if(enableMoments) {
-					delta += momentumWeight * lastDeltas[layerIndex][j][k];
-					lastDeltas[layerIndex][j][k]=delta;
+					delta += momentumWeight * _lastDeltas[layerIndex][j][k];
+					_lastDeltas[layerIndex][j][k]=delta;
 				}

 				layer[j].weight(k)+= delta;
--- a/src/NeuralNetwork/Learning/BatchPropagation.cpp
+++ b/src/NeuralNetwork/Learning/BatchPropagation.cpp
@@ -0,0 +1,92 @@
+#include <NeuralNetwork/Learning/BatchPropagation.h>
+
+void NeuralNetwork::Learning::BatchPropagation::teach(const std::vector<float> &input, const std::vector<float> &expectation) {
+	_network.computeOutput(input);
+	if(!init) {
+		resize();
+		init = true;
+	}
+
+	computeSlopes(expectation);
+
+	computeDeltas(input);
+	if(++_currentBatchSize >= _batchSize) {
+		finishTeaching();
+	}
+}
+
+void NeuralNetwork::Learning::BatchPropagation::finishTeaching() {
+	updateWeightsAndEndBatch();
+	_currentBatchSize=0;
+}
+
+void NeuralNetwork::Learning::BatchPropagation::computeSlopes(const std::vector<float> &expectation) {
+	const auto& outputLayer=_network[_network.size()-1];
+	for(std::size_t j=1;j<outputLayer.size();j++) {
+		const auto& neuron = outputLayer[j];
+		_slopes[_network.size()-1][j]=_correctionFunction->operator()( expectation[j-1], neuron.output())*
+									neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
+	}
+
+	for(int layerIndex=static_cast<int>(_network.size()-2);layerIndex>0;layerIndex--) {
+		auto &layer=_network[layerIndex];
+
+		for(std::size_t j=1;j<layer.size();j++) {
+			float deltasWeight = 0;
+
+			for(std::size_t k=1;k<_network[layerIndex+1].size();k++) {
+				deltasWeight+=_slopes[layerIndex+1][k]* _network[layerIndex+1][k].weight(j);
+			}
+
+			_slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
+		}
+	}
+}
+
+void NeuralNetwork::Learning::BatchPropagation::computeDeltas(const std::vector<float> &input) {
+	for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
+		auto &layer=_network[layerIndex];
+		auto &prevLayer=_network[layerIndex-1];
+
+		std::size_t prevLayerSize=prevLayer.size();
+		std::size_t layerSize=layer.size();
+
+		for(std::size_t j=1;j<layerSize;j++)  {
+			float update = _slopes[layerIndex][j];
+			for(std::size_t k=0;k<prevLayerSize;k++) {
+				float inputValue = 0.0;
+				if(layerIndex==1 && k!=0) {
+					inputValue = input[k-1];
+				} else {
+					inputValue= prevLayer[k].output();
+				}
+				if(_currentBatchSize == 0) {
+					_gradients[layerIndex][j][k] = update * inputValue;
+				} else {
+					_gradients[layerIndex][j][k] += update * inputValue;
+				}
+			}
+		}
+	}
+}
+
+void NeuralNetwork::Learning::BatchPropagation::resize() {
+	_slopes.resize(_network.size());
+
+	for(std::size_t i=0; i < _network.size(); i++) {
+		_slopes[i].resize(_network[i].size());
+	}
+
+	_gradients.resize(_network.size());
+
+	for(std::size_t i = 0; i < _network.size(); i++) {
+		_gradients[i].resize(_network[i].size());
+		if(i > 0) {
+			for(std::size_t j = 0; j < _gradients[i].size(); j++) {
+				_gradients[i][j].resize(_network[i - 1].size());
+				std::fill(_gradients[i][j].begin(), _gradients[i][j].end(), 0.0);
+			}
+		}
+	}
+
+}
--- a/src/NeuralNetwork/Learning/QuickPropagation.cpp
+++ b/src/NeuralNetwork/Learning/QuickPropagation.cpp
@@ -3,7 +3,7 @@
 #include <cassert>
 #include <immintrin.h>

-void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<float> &input) {
+void NeuralNetwork::Learning::QuickPropagation::computeDeltas(const std::vector<float> &input) {

 	float shrinkFactor=_maxChange/(_maxChange+1.0);

@@ -15,20 +15,39 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
 		std::size_t layerSize=layer.size();

 		for(std::size_t j=1;j<layerSize;j++)  {
+			for(std::size_t k=1;k<layerSize;k++) {
+				double update = 0.0;

+
+
+				double inputValue = 0.0;
+				if(layerIndex==1) {
+					inputValue=input[k-1];
+				} else {
+					inputValue=prevLayer[k].output();
+				}
+
+				if(currentBatchSize == 0) {
+					gradients[layerIndex][j][k] = update * inputValue;
+				} else {
+					gradients[layerIndex][j][k] += update * inputValue;
+				}
+
+			}
+/*
 			float newChange=0;

-			if(fabs (deltas[layerIndex][j])> 0.0001) {
-				if(std::signbit(deltas[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
+			if(fabs (gradients[layerIndex][j])> 0.0001) {
+				if(std::signbit(gradients[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
 					newChange+= slopes[layerIndex][j]*_epsilon;

 					if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
-						newChange += _maxChange * deltas[layerIndex][j];
+						newChange += _maxChange * gradients[layerIndex][j];
 					}else {
-						newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
+						newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
 					}
 				} else {
-					newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
+					newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
 				}
 			} else {
 				newChange+= slopes[layerIndex][j]*_epsilon;
@@ -45,9 +64,7 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
 					layer[j].weight(k)+=newChange*(prevLayer[k].output());
 				}
 			}
+			*/
 		}
 	}
-
-	slopes.swap(previousSlopes);
-	weightChange.swap(deltas);
 }
--- a/src/NeuralNetwork/Learning/RProp.cpp
+++ b/src/NeuralNetwork/Learning/RProp.cpp
@@ -1,101 +1,37 @@
 #include <NeuralNetwork/Learning/RProp.h>

-void NeuralNetwork::Learning::RProp::teach(const std::vector<float> &input, const std::vector<float> &expectation) {
-	network.computeOutput(input);
-	resize();
-	computeSlopes(expectation);
+void NeuralNetwork::Learning::RProp::updateWeightsAndEndBatch() {

-	computeDeltas(input);
-	if(++currentBatchSize >= batchSize) {
-		updateWeights();
-		endBatch();
-		currentBatchSize=0;
-	}
-}
-
-void NeuralNetwork::Learning::RProp::computeSlopes(const std::vector<float> &expectation) {
-	auto& outputLayer=network[network.size()-1];
-	for(std::size_t j=1;j<outputLayer.size();j++) {
-		auto& neuron = outputLayer[j];
-		slopes[network.size()-1][j]=correctionFunction->operator()( expectation[j-1], neuron.output())*
-									neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
-	}
-
-	for(int layerIndex=static_cast<int>(network.size()-2);layerIndex>0;layerIndex--) {
-		auto &layer=network[layerIndex];
-
-		for(std::size_t j=1;j<layer.size();j++) {
-			float deltasWeight = 0;
-
-			for(std::size_t k=1;k<network[layerIndex+1].size();k++) {
-				deltasWeight+=slopes[layerIndex+1][k]* network[layerIndex+1][k].weight(j);
-			}
-
-			slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
-		}
-	}
-}
-
-void NeuralNetwork::Learning::RProp::computeDeltas(const std::vector<float> &input) {
-	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
-		auto &layer=network[layerIndex];
-		auto &prevLayer=network[layerIndex-1];
-
-		std::size_t prevLayerSize=prevLayer.size();
-		std::size_t layerSize=layer.size();
-
-		for(std::size_t j=1;j<layerSize;j++)  {
-			float update = slopes[layerIndex][j];
-			for(std::size_t k=0;k<prevLayerSize;k++) {
-				float inputValue = 0.0;
-				if(layerIndex==1 && k!=0) {
-					inputValue = input[k-1];
-				} else {
-					inputValue= prevLayer[k].output();
-				}
-				if(currentBatchSize == 0) {
-					gradients[layerIndex][j][k] = update * inputValue;
-				} else {
-					gradients[layerIndex][j][k] += update * inputValue;
-				}
-			}
-		}
-	}
-}
-
-void NeuralNetwork::Learning::RProp::updateWeights() {
-
-	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
-		auto &layer = network[layerIndex];
-		auto &prevLayer = network[layerIndex - 1];
+	for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
+		auto &layer = _network[layerIndex];
+		auto &prevLayer = _network[layerIndex - 1];

 		std::size_t prevLayerSize = prevLayer.size();
 		std::size_t layerSize = layer.size();

 		for(std::size_t j = 1; j < layerSize; j++) {
 			for(std::size_t k = 0; k < prevLayerSize; k++) {
-				float gradient = gradients[layerIndex][j][k];
-				float lastGradient = lastGradients[layerIndex][j][k];
+				float gradient = _gradients[layerIndex][j][k];
+				float lastGradient = _lastGradients[layerIndex][j][k];

-				lastGradients[layerIndex][j][k] = gradient;
+				_lastGradients[layerIndex][j][k] = gradient;

-				float weightChangeDelta = lastWeightChanges[layerIndex][j][k];
+				float weightChangeDelta = _lastWeightChanges[layerIndex][j][k];

 				if(gradient * lastGradient > 0) {
 					weightChangeDelta = std::min(weightChangeDelta*weightChangePlus,maxChangeOfWeights);
 				} else if (gradient * lastGradient < 0) {
 					weightChangeDelta = std::max(weightChangeDelta*weightChangeMinus,minChangeOfWeights);
 				} else {
-					weightChangeDelta = lastWeightChanges[layerIndex][j][k];
+					weightChangeDelta = _lastWeightChanges[layerIndex][j][k];
 				}

-				lastWeightChanges[layerIndex][j][k] = weightChangeDelta;
+				_lastWeightChanges[layerIndex][j][k] = weightChangeDelta;

 				if(gradient > 0) {
 					layer[j].weight(k) += weightChangeDelta;
 				} else if (gradient < 0){
 					layer[j].weight(k) -= weightChangeDelta;
-				} else {
 				}
 			}
 		}