Rprop implementation

2016-10-30 23:00:50 +01:00
parent 554ef1b46b
commit 8749b3eb03
5 changed files with 415 additions and 3 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,7 @@ set (LIBRARY_SOURCES
 	src/NeuralNetwork/Learning/BackPropagation.cpp
 	src/NeuralNetwork/Learning/QuickPropagation.cpp
 	src/NeuralNetwork/Learning/PerceptronLearning.cpp
+	src/NeuralNetwork/Learning/RProp.cpp

 	src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
 	src/NeuralNetwork/ConstructiveAlgorithms/Cascade2.cpp
@@ -118,6 +119,9 @@ IF(ENABLE_TESTS)
 	add_test(quickpropagation tests/quickpropagation)
 	set_property(TEST quickpropagation PROPERTY LABELS unit)

+	add_test(rprop tests/rprop)
+	set_property(TEST rprop PROPERTY LABELS unit)
+
 	add_test(recurrent tests/recurrent)
 	set_property(TEST recurrent PROPERTY LABELS unit)

@@ -136,8 +140,5 @@ IF(ENABLE_TESTS)
 	add_test(recurrent_perf tests/recurrent_perf)
 	set_property(TEST recurrent_perf PROPERTY LABELS perf)

-	add_test(genetic_programing tests/genetic_programing)
-	set_property(TEST genetic_programing PROPERTY LABELS unit)
-
 ENDIF(ENABLE_TESTS)

--- a/include/NeuralNetwork/Learning/RProp.h
+++ b/include/NeuralNetwork/Learning/RProp.h
@@ -0,0 +1,140 @@
+#pragma once
+
+#include <vector>
+#include <cmath>
+
+#include <NeuralNetwork/FeedForward/Network.h>
+#include "CorrectionFunction/Linear.h"
+
+namespace NeuralNetwork {
+	namespace Learning {
+
+		/** @class Resilient Propagation
+		 * @brief
+		 */
+		class RProp {
+
+			public:
+				RProp(FeedForward::Network &feedForwardNetwork, CorrectionFunction::CorrectionFunction *correction = new CorrectionFunction::Linear()):
+					network(feedForwardNetwork), correctionFunction(correction) {
+					resize();
+				}
+
+				virtual ~RProp() {
+					delete correctionFunction;
+				}
+
+				RProp(const RProp&)=delete;
+				RProp& operator=(const NeuralNetwork::Learning::RProp&) = delete;
+
+				void teach(const std::vector<float> &input, const std::vector<float> &output);
+
+				std::size_t getBatchSize() const {
+					return batchSize;
+				}
+
+				void setBatchSize(std::size_t size) {
+					batchSize = size;
+				}
+
+				void setInitialWeightChange(float init) {
+					initialWeightChange=init;
+				}
+			protected:
+
+				virtual inline void resize() {
+					if(slopes.size()!=network.size())
+						slopes.resize(network.size());
+
+					for(std::size_t i=0; i < network.size(); i++) {
+						if(slopes[i].size()!=network[i].size())
+							slopes[i].resize(network[i].size());
+					}
+
+					if(gradients.size() != network.size())
+						gradients.resize(network.size());
+
+					bool resized = false;
+
+					for(std::size_t i = 0; i < network.size(); i++) {
+						if(gradients[i].size() != network[i].size()) {
+							gradients[i].resize(network[i].size());
+							resized = true;
+
+							if(i > 0) {
+								for(std::size_t j = 0; j < gradients[i].size(); j++) {
+									gradients[i][j].resize(network[i - 1].size());
+									std::fill(gradients[i][j].begin(),gradients[i][j].end(),0.0);
+								}
+							}
+						}
+					}
+
+					if(resized) {
+						lastGradients = gradients;
+
+						if(changesOfWeightChanges.size() != network.size())
+							changesOfWeightChanges.resize(network.size());
+
+						for(std::size_t i = 0; i < network.size(); i++) {
+							if(changesOfWeightChanges[i].size() != network[i].size()) {
+								changesOfWeightChanges[i].resize(network[i].size());
+								if(i > 0) {
+									for(std::size_t j = 0; j < changesOfWeightChanges[i].size(); j++) {
+										changesOfWeightChanges[i][j].resize(network[i - 1].size());
+										std::fill(changesOfWeightChanges[i][j].begin(),changesOfWeightChanges[i][j].end(),initialWeightChange);
+									}
+								}
+							}
+						}
+					}
+
+					if(resized) {
+						if(lastWeightChanges.size() != network.size())
+							lastWeightChanges.resize(network.size());
+
+						for(std::size_t i = 0; i < network.size(); i++) {
+							if(lastWeightChanges[i].size() != network[i].size()) {
+								lastWeightChanges[i].resize(network[i].size());
+								if(i > 0) {
+									for(std::size_t j = 0; j < lastWeightChanges[i].size(); j++) {
+										lastWeightChanges[i][j].resize(network[i - 1].size());
+										std::fill(lastWeightChanges[i][j].begin(),lastWeightChanges[i][j].end(),0.1);
+									}
+								}
+							}
+						}
+					}
+				}
+
+				virtual void computeSlopes(const std::vector<float> &expectation);
+				virtual void computeDeltas(const std::vector<float> &input);
+
+				void updateWeights();
+
+				virtual void endBatch() {
+
+				}
+
+				FeedForward::Network &network;
+
+				CorrectionFunction::CorrectionFunction *correctionFunction;
+
+				std::vector<std::vector<float>> slopes;
+				std::vector<std::vector<std::vector<float>>> gradients = {};
+				std::vector<std::vector<std::vector<float>>> lastGradients = {};
+				std::vector<std::vector<std::vector<float>>> lastWeightChanges = {};
+				std::vector<std::vector<std::vector<float>>> changesOfWeightChanges = {};
+
+				std::size_t batchSize = 1;
+				std::size_t currentBatchSize = 0;
+
+				float maxChangeOfWeights = 50;
+				float minChangeOfWeights = 0.0001;
+
+				float initialWeightChange=0.02;
+				float weightChangePlus=1.2;
+				float weightChangeMinus=0.5;
+		};
+	}
+}
--- a/src/NeuralNetwork/Learning/RProp.cpp
+++ b/src/NeuralNetwork/Learning/RProp.cpp
@@ -0,0 +1,103 @@
+#include <NeuralNetwork/Learning/RProp.h>
+
+void NeuralNetwork::Learning::RProp::teach(const std::vector<float> &input, const std::vector<float> &expectation) {
+	network.computeOutput(input);
+	resize();
+	computeSlopes(expectation);
+
+	computeDeltas(input);
+	if(++currentBatchSize >= batchSize) {
+		updateWeights();
+		endBatch();
+		currentBatchSize=0;
+	}
+}
+
+void NeuralNetwork::Learning::RProp::computeSlopes(const std::vector<float> &expectation) {
+	auto& outputLayer=network[network.size()-1];
+	for(std::size_t j=1;j<outputLayer.size();j++) {
+		auto& neuron = outputLayer[j];
+		slopes[network.size()-1][j]=correctionFunction->operator()( expectation[j-1], neuron.output())*
+									neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
+	}
+
+	for(int layerIndex=static_cast<int>(network.size()-2);layerIndex>0;layerIndex--) {
+		auto &layer=network[layerIndex];
+
+		for(std::size_t j=1;j<layer.size();j++) {
+			float deltasWeight = 0;
+
+			for(std::size_t k=1;k<network[layerIndex+1].size();k++) {
+				deltasWeight+=slopes[layerIndex+1][k]* network[layerIndex+1][k].weight(j);
+			}
+
+			slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
+		}
+	}
+}
+
+void NeuralNetwork::Learning::RProp::computeDeltas(const std::vector<float> &input) {
+	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
+		auto &layer=network[layerIndex];
+		auto &prevLayer=network[layerIndex-1];
+
+		std::size_t prevLayerSize=prevLayer.size();
+		std::size_t layerSize=layer.size();
+
+		for(std::size_t j=1;j<layerSize;j++)  {
+			float update = slopes[layerIndex][j];
+			for(std::size_t k=0;k<prevLayerSize;k++) {
+				float inputValue = 0.0;
+				if(layerIndex==1 && k!=0) {
+					inputValue = input[k-1];
+				} else {
+					inputValue= prevLayer[k].output();
+				}
+				if(currentBatchSize == 0) {
+					gradients[layerIndex][j][k] = update * inputValue;
+				} else {
+					gradients[layerIndex][j][k] += update * inputValue;
+				}
+			}
+		}
+	}
+}
+
+void NeuralNetwork::Learning::RProp::updateWeights() {
+
+	for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
+		auto &layer = network[layerIndex];
+		auto &prevLayer = network[layerIndex - 1];
+
+		std::size_t prevLayerSize = prevLayer.size();
+		std::size_t layerSize = layer.size();
+
+		for(std::size_t j = 1; j < layerSize; j++) {
+			for(std::size_t k = 0; k < prevLayerSize; k++) {
+				float gradient = gradients[layerIndex][j][k];
+				float lastGradient = lastGradients[layerIndex][j][k];
+
+				lastGradients[layerIndex][j][k] = gradient;
+
+				float weightChangeDelta = lastWeightChanges[layerIndex][j][k];
+
+				if(gradient * lastGradient > 0) {
+					weightChangeDelta = std::min(weightChangeDelta*weightChangePlus,maxChangeOfWeights);
+				} else if (gradient * lastGradient < 0) {
+					weightChangeDelta = std::max(weightChangeDelta*weightChangeMinus,minChangeOfWeights);
+				} else {
+					weightChangeDelta = lastWeightChanges[layerIndex][j][k];
+				}
+
+				lastWeightChanges[layerIndex][j][k] = weightChangeDelta;
+
+				if(gradient > 0) {
+					layer[j].weight(k) += weightChangeDelta;
+				} else if (gradient < 0){
+					layer[j].weight(k) -= weightChangeDelta;
+				} else {
+				}
+			}
+		}
+	}
+}
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -28,6 +28,9 @@ target_link_libraries(recurrent NeuralNetwork gtest gtest_main)
 add_executable(quickpropagation quickpropagation.cpp)
 target_link_libraries(quickpropagation NeuralNetwork gtest gtest_main)

+add_executable(rprop rprop.cpp)
+target_link_libraries(rprop NeuralNetwork gtest gtest_main)
+
 # PERF

 add_executable(backpropagation_function_cmp backpropagation_function_cmp.cpp)
--- a/tests/rprop.cpp
+++ b/tests/rprop.cpp
@@ -0,0 +1,165 @@
+#include <NeuralNetwork/FeedForward/Network.h>
+#include <NeuralNetwork/Learning/RProp.h>
+#include <NeuralNetwork/ActivationFunction/HyperbolicTangent.h>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Weffc++"
+
+#include <gtest/gtest.h>
+
+#pragma GCC diagnostic pop
+TEST(RProp,XOR) {
+	NeuralNetwork::FeedForward::Network n(2);
+	NeuralNetwork::ActivationFunction::Sigmoid a(-1);
+	n.appendLayer(3,a);
+	n.appendLayer(1,a);
+
+	n.randomizeWeights();
+
+	NeuralNetwork::Learning::RProp prop(n);
+	prop.setBatchSize(4);
+
+	for(int i=0;i<100;i++) {
+		prop.teach({1,0},{1});
+		prop.teach({1,1},{0});
+		prop.teach({0,0},{0});
+		prop.teach({0,1},{1});
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,1});
+		ASSERT_LT(ret[0], 0.1);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,1});
+		ASSERT_GT(ret[0], 0.9);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,0});
+		ASSERT_GT(ret[0], 0.9);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,0});
+		ASSERT_LT(ret[0], 0.1);
+	}
+}
+
+TEST(RProp,XORHyperbolicTangent) {
+	srand(time(NULL));
+	NeuralNetwork::FeedForward::Network n(2);
+	NeuralNetwork::ActivationFunction::HyperbolicTangent a(-1);
+	n.appendLayer(2,a);
+	n.appendLayer(1,a);
+
+	n.randomizeWeights();
+
+	NeuralNetwork::Learning::RProp prop(n);
+	prop.setBatchSize(4);
+
+	for(int i=0;i<15000;i++) {
+		prop.teach({1,0},{1});
+		prop.teach({1,1},{0});
+		prop.teach({0,0},{0});
+		prop.teach({0,1},{1});
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,1});
+		ASSERT_LT(ret[0], 0.1);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,1});
+		ASSERT_GT(ret[0], 0.9);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,0});
+		ASSERT_GT(ret[0], 0.9);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,0});
+		ASSERT_LT(ret[0], 0.1);
+	}
+}
+
+TEST(RProp,AND) {
+	NeuralNetwork::FeedForward::Network n(2);
+	NeuralNetwork::ActivationFunction::Sigmoid a(-1);
+	n.appendLayer(1,a);
+
+	n.randomizeWeights();
+
+	NeuralNetwork::Learning::RProp prop(n);
+	prop.setBatchSize(4);
+
+	for(int i=0;i<100000;i++) {
+		prop.teach({1,1},{1});
+		prop.teach({0,0},{0});
+		prop.teach({0,1},{0});
+		prop.teach({1,0},{0});
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,1});
+		ASSERT_GT(ret[0], 0.9);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,1});
+		ASSERT_LT(ret[0], 0.1);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,0});
+		ASSERT_LT(ret[0], 0.1);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,0});
+		ASSERT_LT(ret[0], 0.1);
+	}
+}
+
+TEST(RProp,NOTAND) {
+	NeuralNetwork::FeedForward::Network n(2);
+	NeuralNetwork::ActivationFunction::Sigmoid a(-1);
+	n.appendLayer(2,a);
+	n.appendLayer(1,a);
+
+	n.randomizeWeights();
+
+	NeuralNetwork::Learning::RProp prop(n);
+	prop.setBatchSize(4);
+
+	for(int i=0;i<100000;i++) {
+		prop.teach({1,1},{0});
+		prop.teach({0,0},{1});
+		prop.teach({0,1},{1});
+		prop.teach({1,0},{1});
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,1});
+		ASSERT_LT(ret[0], 0.1);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,1});
+		ASSERT_GT(ret[0], 0.9);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({1,0});
+		ASSERT_GT(ret[0], 0.9);
+	}
+
+	{
+		std::vector<float> ret =n.computeOutput({0,0});
+		ASSERT_GT(ret[0], 0.9);
+	}
+}