Cascade2 algo implementation

2016-05-13 20:18:43 +02:00
parent 9e2ce222fb
commit 58abfea7a2
2 changed files with 358 additions and 0 deletions
--- a/include/NeuralNetwork/ConstructiveAlgorithms/Cascade2.h
+++ b/include/NeuralNetwork/ConstructiveAlgorithms/Cascade2.h
@@ -0,0 +1,31 @@
 #pragma once
 #include "../Cascade/Network.h"
 #include "../FeedForward/Network.h"
 #include "../Learning/QuickPropagation.h"
 #include "CascadeCorrelation.h"
 #include <random>
 #include <algorithm>
 // http://fann.cvs.sourceforge.net/viewvc/fann/fann/src/fann_cascade.c?view=markup
 // https://github.com/gtomar/cascade
 namespace NeuralNetwork {
 	namespace ConstructiveAlgorihtms {
 		class Cascade2 : public CascadeCorrelation {
 			public:
 				typedef std::pair<std::vector<float>, std::vector<float>> TrainingPattern;
 				Cascade2(std::size_t numberOfCandidate = 18, float maxError = 0.7) : CascadeCorrelation(numberOfCandidate, maxError) {
 				}
 			protected:
 				virtual std::pair<std::shared_ptr<Neuron>, std::vector<float>> trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates,
 																					   const std::vector<TrainingPattern> &patterns) override;
 		};
 	}
 }
--- a/src/NeuralNetwork/ConstructiveAlgorithms/Cascade2.cpp
+++ b/src/NeuralNetwork/ConstructiveAlgorithms/Cascade2.cpp
@@ -0,0 +1,327 @@
 #include <NeuralNetwork/ConstructiveAlgorithms/Cascade2.h>
 using namespace NeuralNetwork::ConstructiveAlgorihtms;
 std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> Cascade2::trainCandidates(Cascade::Network &network,
 																								std::vector<std::shared_ptr<Neuron>> &candidates,
 																								const std::vector<TrainingPattern> &patterns) {
 	std::size_t outputs = patterns[0].second.size();
 	std::vector<TrainingPattern> patternsForOutput;
 	float sumSqDiffs=0.0;
 	for(auto &pattern:patterns) {
 		patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
 	}
 	std::vector <std::vector<float>> errors(patterns.size());
 	for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
 		auto &pattern = patterns[patternNumber];
 		errors[patternNumber].resize(network.outputs());
 		std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
 		for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
 			float diff = output[outputIndex]-pattern.second[outputIndex];
 			errors[patternNumber][outputIndex] = diff;
 			sumSqDiffs+=diff*diff;
 		}
 	}
 	std::size_t iterations = 0;
 	std::size_t iterationsWithoutIprovement = 0;
 	float bestCorrelation = 0;
 	float lastCorrelation = 0;
 	std::size_t bestCandidateIndex=0;
 	std::shared_ptr<Neuron> bestCandidate = nullptr;
 	std::vector<std::vector<float>> candidateWeights(candidates.size());
 	for(auto &w: candidateWeights) {
 		w.resize(outputs);
 		for(auto &output: w) {
 			output = fabs(_distribution(_generator))*0.5;
 		}
 	}
 	//compute Correlation Epoch
 	do {
 		lastCorrelation = bestCorrelation;
 		bool firstStep = true;
 		std::size_t candidateIndex=0;
 		for(auto &candidate : candidates) {
 			float score=sumSqDiffs;
 			std::vector<float> slopes(candidate->getWeights().size());
 			std::vector<float> outSlopes(outputs);
 			std::size_t patternIndex=0;
 			for(auto &pattern:patternsForOutput) {
 				float errSum = 0.0;
 				float activationValue =(*candidate)(pattern.first);
 				float derivatived = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
 				for(std::size_t output = 0; output < outputs; output++) {
 					float weight = candidateWeights[candidateIndex][output];
 					float diff = activationValue * weight - errors[patternIndex][output];
 					float goalDir= pattern.second[output] <0.0? -1.0 :1.0;
 					float diffDir= diff >0.0? -1.0 :1.0;
 					score -= (diff * diff);
 					outSlopes[output] -= 2.0 * diff * activationValue;
 					errSum += diff * weight;
 					patternIndex++;
 				}
 				errSum*= derivatived;
 				for(std::size_t input = 0; input < pattern.first.size(); input++) {
 					slopes[input] -= errSum*pattern.first[input];
 				}
 			}
 			for(std::size_t weightIndex = 0; weightIndex < slopes.size(); weightIndex++) {
 				candidate->weight(weightIndex) += slopes[weightIndex] * 0.7 / (patterns.size());/// (patterns.size() * patterns[0].first.size());
 			}
 			for(std::size_t weightIndex = 0; weightIndex < outSlopes.size(); weightIndex++) {
 				candidateWeights[candidateIndex][weightIndex] += outSlopes[weightIndex] * 0.7  / (patterns.size());/// (patterns.size() * patterns[0].first.size());
 			}
 			if(firstStep || score > bestCorrelation) {
 				bestCorrelation = score;
 				bestCandidate = candidate;
 				firstStep = false;
 				bestCandidateIndex=candidateIndex;
 			}
 			candidateIndex++;
 		}
 		if(bestCorrelation <= lastCorrelation) {
 			iterationsWithoutIprovement++;
 		}
 	}
 	while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
 	std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
 	for(auto &a : candidateWeights[bestCandidateIndex]) {
 		a*=-1.0;
 	}
 	return {bestCandidate, candidateWeights[bestCandidateIndex]};
 }
 /*
 *
 std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> Cascade2::trainCandidates(Cascade::Network &network,
 																										  std::vector<std::shared_ptr<Neuron>> &candidates,
 																										  const std::vector<TrainingPattern> &patterns) {
 	std::size_t outputs = patterns[0].second.size();
 	std::vector<TrainingPattern> patternsForOutput;
 	float sumSqDiffs=0.0;
 	for(auto &pattern:patterns) {
 		patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
 	}
 	std::vector<float> errors(patterns.size());
 	for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
 		auto &pattern = patterns[patternNumber];
 		std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
 		for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
 			float diff = output[outputIndex]-pattern.second[outputIndex];
 			errors[outputIndex] += diff;
 			sumSqDiffs+=diff*diff;
 		}
 	}
 	std::size_t iterations = 0;
 	std::size_t iterationsWithoutIprovement = 0;
 	float bestCorrelation = 0;
 	float lastCorrelation = 0;
 	std::size_t bestCandidateIndex=0;
 	std::shared_ptr<Neuron> bestCandidate = nullptr;
 	std::vector<std::vector<float>> candidateWeights(candidates.size());
 	for(auto &w: candidateWeights) {
 		w.resize(outputs);
 		for(auto &output: w) {
 			output = fabs(_distribution(_generator));
 		}
 	}
 	//compute Correlation Epoch
 	do {
 		lastCorrelation = bestCorrelation;
 		bool firstStep = true;
 		std::size_t candidateIndex=0;
 		for(auto &candidate : candidates) {
 			float score=sumSqDiffs;
 			std::vector<float> slopes(candidate->getWeights().size());
 			std::vector<float> outSlopes(outputs);
 			for(auto &pattern:patternsForOutput) {
 				float errSum = 0.0;
 				float activationValue =(*candidate)(pattern.first);
 				float derivatived = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
 				for(std::size_t output = 0; output < outputs; output++) {
 					float weight = candidateWeights[candidateIndex][output];
 					float diff = activationValue * weight - errors[output];
 					float goalDir= pattern.second[output] <0.0? -1.0 :1.0;
 					float diffDir= diff >0.0? -1.0 :1.0;
 					score -= (diff * diff);
 					outSlopes[output] += diff * activationValue;
 					errSum += diff * weight;
 				}
 				errSum*= derivatived;
 				for(std::size_t input = 0; input < pattern.first.size(); input++) {
 					slopes[input] += errSum*pattern.first[input];
 				}
 			}
 			for(std::size_t weightIndex = 0; weightIndex < slopes.size(); weightIndex++) {
 				candidate->weight(weightIndex) += slopes[weightIndex] * 0.7/ (patterns.size() * patterns[0].first.size());
 			}
 			for(std::size_t weightIndex = 0; weightIndex < outSlopes.size(); weightIndex++) {
 				candidateWeights[candidateIndex][weightIndex] += outSlopes[weightIndex] * 0.7/ (patterns.size() * patterns[0].first.size());
 			}
 			if(firstStep || score > bestCorrelation) {
 				bestCorrelation = score;
 				bestCandidate = candidate;
 				firstStep = false;
 				bestCandidateIndex=candidateIndex;
 			}
 			candidateIndex++;
 		}
 		if(bestCorrelation <= lastCorrelation) {
 			iterationsWithoutIprovement++;
 		}
 	}
 	while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
 	std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
 	for(auto &a : candidateWeights[bestCandidateIndex]) {
 		a*=-1.0;
 	}
 	return {bestCandidate, candidateWeights[bestCandidateIndex]};
 }
 */
 /*
 std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> Cascade2::trainCandidates(Cascade::Network &network,
 																								std::vector<std::shared_ptr<Neuron>> &candidates,
 																								const std::vector<TrainingPattern> &patterns) {
 	std::size_t outputs = patterns[0].second.size();
 	std::vector<TrainingPattern> patternsForOutput;
 	std::vector<FeedForward::Network*> patternNets;
 	for(auto &pattern:patterns) {
 		patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
 	}
 	std::vector<float> errors(patterns.size());
 	for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
 		auto &pattern = patterns[patternNumber];
 		std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
 		patternNets.push_back(new FeedForward::Network(patternsForOutput[patternNumber].first.size()-1));
 		auto patternNetwork = patternNets.back();
 		auto &hidden = patternNetwork->appendLayer(2);
 		auto &outputLayer = patternNetwork->appendLayer(outputs);
 		for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
 			outputLayer[outputIndex+1].weight(0) = network.getOutputNeurons()[outputIndex]->value();
 			float diff = pattern.second[outputIndex] - output[outputIndex];
 			errors[outputIndex] += diff;
 		}
 	}
 	std::size_t iterations = 0;
 	std::size_t iterationsWithoutIprovement = 0;
 	float bestCorrelation = 0;
 	float lastCorrelation = 0;
 	std::size_t bestCandidateIndex=0;
 	std::vector<std::vector<float>> candidateWeights(candidates.size());
 	for(auto &w: candidateWeights) {
 		w.resize(outputs);
 		for(auto &output: w) {
 			output = fabs(_distribution(_generator));
 		}
 	}
 	std::vector<float>candidateScores(candidates.size());
 	//compute Correlation Epoch
 	do {
 		std::fill(candidateScores.begin(),candidateScores.end(),0.0);
 		lastCorrelation = bestCorrelation;
 		for(std::size_t patternIndex=0;patternIndex<patternsForOutput.size();patternIndex++) {
 			std::size_t candidateIndex=0;
 			auto &pattern = patternsForOutput[patternIndex];
 			auto net = patternNets[patternIndex];
 			Learning::BackPropagation bp(*net);
 			for(auto &candidate : candidates) {
 				float score = 0;
 				(*net)[1][1].setWeights(candidate->getWeights());
 				for(std::size_t outputNeuron=0;outputNeuron<outputs;outputNeuron++) {
 					(*net)[2][outputNeuron+1].weight(1)=candidateWeights[candidateIndex][outputNeuron];
 				}
 				bp.teach(pattern.first,pattern.second);
 				auto res = net->computeOutput(pattern.first);
 				for(std::size_t outputNeuron=0;outputNeuron<outputs;outputNeuron++) {
 					candidateWeights[candidateIndex][outputNeuron]=(*net)[2][outputNeuron+1].weight(1);
 					candidateScores[candidateIndex]+=res[outputNeuron]*res[outputNeuron];
 				}
 				candidate->setWeights((*net)[1][1].getWeights());
 				candidateIndex++;
 			}
 		}
 		bestCorrelation=candidateScores[0];
 		bestCandidateIndex=0;
 		for(std::size_t index=1;index < candidateScores.size();index++) {
 			if(bestCorrelation > candidateScores[index]) {
 				bestCandidateIndex = index;
 			}
 		}
 		if(bestCorrelation <= lastCorrelation) {
 			iterationsWithoutIprovement++;
 		}
 	}
 	while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
 	std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
 	for(auto &net:patternNets) {
 		delete net;
 	}
 	return {candidates[bestCandidateIndex], candidateWeights[bestCandidateIndex]};
 }
 */