cascade...

2016-05-09 21:52:39 +02:00
parent 5a7f10ba81
commit 8ae08c4b94
2 changed files with 122 additions and 81 deletions
--- a/include/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h
+++ b/include/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h
@@ -41,7 +41,7 @@ namespace NeuralNetwork {
 						addBestCandidate(network, candidate);

 						if(_maxRandomOutputWeights) {
-							error = trainOutputsRandom(0, network, patterns);
+							error = trainOutputsRandom(_epoch, network, patterns);
 						} else {
 							error = trainOutputs(network, patterns);
 						}
@@ -88,7 +88,7 @@ namespace NeuralNetwork {
 				}

 			protected:
-				std::shared_ptr<ActivationFunction::ActivationFunction> _activFunction = std::make_shared<ActivationFunction::Sigmoid>(-4.9);
+				std::shared_ptr<ActivationFunction::ActivationFunction> _activFunction = std::make_shared<ActivationFunction::Sigmoid>(-0.8);
 				float _minimalErrorStep = 0.00005;
 				float _maxError;
 				float _weightRange;
@@ -98,9 +98,9 @@ namespace NeuralNetwork {
 				std::size_t _maxRandomOutputWeights = 0;
 				std::size_t _numberOfCandidates;
 				std::size_t _maxOutpuLearningIterations = 1000;
-				std::size_t _maxOutpuLearningIterationsWithoutChange = 5;
-				std::size_t _maxCandidateIterations = 20;
-				std::size_t _maxCandidateIterationsWithoutChange = 5;
+				std::size_t _maxOutpuLearningIterationsWithoutChange = 100;
+				std::size_t _maxCandidateIterations = 4000;
+				std::size_t _maxCandidateIterationsWithoutChange = 15;

 				std::mt19937 _generator;
 				std::uniform_real_distribution<> _distribution;
@@ -139,9 +139,9 @@ namespace NeuralNetwork {
 						for(auto &weight: weights) {
 							weight *= 0.9;
 						}
-						weights[weights.size() - 1] = -candidate.second[outIndex] / weightPortion;//_distribution(_generator);
 						outIndex++;
 						n->setWeights(weights);
+						n->weight(n->getWeights().size() - 1) = -candidate.second[outIndex] / weightPortion;
 					}
 				}

@@ -154,7 +154,7 @@ namespace NeuralNetwork {
 						candidates.back()->setActivationFunction(*_activFunction.get());

 						for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) {
-							candidates.back()->weight(weightIndex) = _distribution(_generator);
+							candidates.back()->weight(weightIndex) = _distribution(_generator) * 3.0;
 						}
 					}
 					return candidates;
--- a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
+++ b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
@@ -10,8 +10,8 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
 	Learning::BackPropagation learner(p);

 	for(std::size_t neuron = 0; neuron < outputs; neuron++) {
-		p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
-		p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
+		p[1][neuron + 1].setWeights(network.getOutputNeurons()[neuron]->getWeights());
+		p[1][neuron + 1].setActivationFunction(network.getOutputNeurons()[neuron]->getActivationFunction());
 	}

 	std::vector<TrainingPattern> patternsForOutput;
@@ -46,16 +46,18 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
 	}
 	while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);

+	std::cout << "outputLearning: " << error << ", last: " << lastError << ", iters: " << iteration << "\n";
 	for(std::size_t neuron = 0; neuron < outputs; neuron++) {
-		network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights());
+		network.getOutputNeurons()[neuron]->setWeights(p[1][neuron + 1].getWeights());
 	}
 	return error;
 }

+
 float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) {
 	std::size_t outputs = patterns[0].second.size();

-	std::vector<FeedForward::Network*> possibleOutputs;
+	std::vector<FeedForward::Network *> possibleOutputs;
 	{ // first networks is special
 		possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
 		FeedForward::Network &p = (*possibleOutputs.back());
@@ -68,17 +70,17 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network

 	}

-	std::size_t generatedNets =0;
+	std::size_t generatedNets = 0;

-	if(step ==0 ) {
-		generatedNets=_maxRandomOutputWeights;
-	} else if(step % 15 ==0 ){
-		generatedNets=_maxRandomOutputWeights;
+	if(step == 0) {
+		generatedNets = _maxRandomOutputWeights;
+	} else if(step % 15 == 0) {
+		generatedNets = _maxRandomOutputWeights;
 	} else {
-		generatedNets=_maxRandomOutputWeights/step;
+		generatedNets = _maxRandomOutputWeights / step;
 	}

-	for(std::size_t net =0;net < generatedNets;net++) {
+	for(std::size_t net = 0; net < generatedNets; net++) {
 		possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
 		FeedForward::Network &p = (*possibleOutputs.back());
 		p.appendLayer(outputs);
@@ -98,10 +100,10 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network

 	std::size_t bestNetwork = 0;
 	float bestScore = std::numeric_limits<float>::max();
-	std::size_t index=0;
+	std::size_t index = 0;

 	for(auto &net : possibleOutputs) {
-		auto &p=*net;
+		auto &p = *net;
 		Learning::BackPropagation learner(p);

 		float lastError;
@@ -130,8 +132,8 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network
 		}
 		while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);
 		if(error < bestScore) {
-			bestScore=error;
-			bestNetwork=index;
+			bestScore = error;
+			bestNetwork = index;
 		}
 		index++;
 	}
@@ -146,6 +148,7 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network
 	return bestScore;
 }

+
 std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCorrelation::trainCandidates(Cascade::Network &network,
 																										  std::vector<std::shared_ptr<Neuron>> &candidates,
 																										  const std::vector<TrainingPattern> &patterns) {
@@ -159,107 +162,145 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor

 	std::vector<std::vector<float>> errors(patterns.size());
 	std::vector<float> meanErrors(outputs);
-	float sumSquareError=0;
+	float sumSquareError = 0;

+	std::vector<std::vector<float>> errorsReal(patterns.size());
 	for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
 		auto &pattern = patterns[patternNumber];
 		errors[patternNumber].resize(network.outputs());
+		errorsReal[patternNumber].resize(network.outputs());

 		std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
 		for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
-			//float error = pow(pattern.second[outputIndex] - output[outputIndex], 2);
+			float diff = output[outputIndex] - pattern.second[outputIndex];
+			//float diff = pattern.second[outputIndex] - output[outputIndex];
+
 			auto neuron = network.getOutputNeurons()[outputIndex];
-			float error = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output())*(output[outputIndex] - pattern.second[outputIndex]);
+			float derivation = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output());
+			float error = derivation * diff;

 			errors[patternNumber][outputIndex] = error;
+			errorsReal[patternNumber][outputIndex] = error;
 			meanErrors[outputIndex] += error;
-			sumSquareError+=error*error;
+			sumSquareError += error * error;
 		}
 	}

 	std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n) { n /= patterns.size(); });

+	struct CAND {
+		std::vector<float> correlations = {};
+		std::vector<float> lastCorrelations = {};
+		std::vector<float> slopes = {};
+		float sumVals = 0;
+		std::shared_ptr<Neuron> candidate = nullptr;
+	};
+
+	std::vector<CAND> candidatesRegister(candidates.size());
+
+	for(std::size_t i = 0; i < candidates.size(); i++) {
+		candidatesRegister[i].candidate = candidates[i];
+		candidatesRegister[i].correlations.resize(outputs);
+		candidatesRegister[i].lastCorrelations.resize(outputs);
+		candidatesRegister[i].slopes.resize(patternsForOutput[0].first.size());
+	}
+
 	std::size_t iterations = 0;
 	std::size_t iterationsWithoutIprovement = 0;
 	float bestCorrelation = 0;
 	float lastCorrelation = 0;
 	std::shared_ptr<Neuron> bestCandidate = nullptr;

-	std::vector<float> bestCorrelations(errors[0].size());
+	std::vector<float> bestCorrelations(outputs);
+
+	for(std::size_t patternIndex = 0; patternIndex < patterns.size(); patternIndex++) {
+		for(auto &candidateStruct : candidatesRegister) {
+			float value = (*candidateStruct.candidate)(patternsForOutput[patternIndex].first);
+			candidateStruct.sumVals += value;
+			for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
+				candidateStruct.correlations[outputIndex] -= value * meanErrors[outputIndex];
+			}
+		}
+	}
+
+	for(auto &candidate : candidatesRegister) {
+		float score = 0.0;
+		float aveValue = candidate.sumVals / patterns.size();
+		for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
+			float correlation = (candidate.correlations[outputIndex] - aveValue * meanErrors[outputIndex]) / sumSquareError;
+			candidate.lastCorrelations[outputIndex] = correlation;
+			candidate.correlations[outputIndex] = 0;
+			candidate.sumVals = 0;
+			score += fabs(correlation);
+		}
+	}

 	do {
 		lastCorrelation = bestCorrelation;
-		bool firstStep = true;
-		for(auto &candidate : candidates) {
-			float correlation;

-			std::vector<float> activations;
-			std::vector<float> correlations(errors[0].size());
-			std::vector<float> correlationSigns(errors[0].size());
+		/*cascor_cand_epoch*/
+		for(std::size_t patternIndex = 0; patternIndex < patterns.size(); patternIndex++) {
+			for(auto &candidateStruct : candidatesRegister) {
+				auto candidate = candidateStruct.candidate;
+				float change = 0;
+				float activation = (*candidate)(patternsForOutput[patternIndex].first);
+				candidateStruct.sumVals += activation;

-			float activationSum=0.0;
-			for(auto &pattern:patternsForOutput) {
-				activations.push_back((*candidate)(pattern.first));
-				activationSum+=activations.back();
+				float derivation = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError;
+				for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
+					float error = errors[patternIndex][outputIndex];
+
+					float direction = candidateStruct.lastCorrelations[outputIndex] < 0.0 ? -1.0 : 1.0;
+
+					change -= direction * derivation * (error - meanErrors[outputIndex]);
+
+					candidateStruct.correlations[outputIndex] -= error * activation;
 				}

-			activationSum/=patternsForOutput.size();
-
-			for(std::size_t err = 0; err < meanErrors.size(); err++) {
-				for(std::size_t activ = 0; activ < activations.size(); activ++) {
-					correlations[err] += (activations[activ]*errors[activ][err] - activationSum * meanErrors[err]);
+				for(std::size_t i = 0; i < candidateStruct.slopes.size(); i++) {
+					candidateStruct.slopes[i] += change * patternsForOutput[patternIndex].first[i];
+				}
 			}
-				correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
 		}

-			correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0, [](const float &a, float b) { return a + fabs(b); });
-
-			if(std::isnan(correlation)) {
-				correlation=-5000;
+		/*adjust ci_weights*/
+		for(auto &candidateStruct : candidatesRegister) {
+			auto candidate = candidateStruct.candidate;
+			for(std::size_t i = 0; i < candidateStruct.slopes.size(); i++) {
+				candidate->weight(i) += candidateStruct.slopes[i] * 2;
+				candidateStruct.slopes[i] = 0.0;
+			}
 		}

-			std::vector<float> derivatives(candidate->getWeights().size());
-			for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
-				float dcdw = 0.0;
-
-				for(std::size_t err = 0; err < errors.size(); err++) {
-					float thetaO = 0.0;
-					for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
-						(*candidate)(patternsForOutput[err].first);
-						float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError;
-						if (std::isnan(derivative)) {
-							//std::cout << "isNan\n";
-							derivative =1;
-						}
-						thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * patternsForOutput[err].first[input];
-					}
-					dcdw += thetaO;
-				}
-				if(std::isnan(dcdw)) {
-					dcdw=0.1;
-				}
-				derivatives[input] = dcdw;
+		/* adjust correlations*/
+		bestCorrelation = 0;
+		bool step = true;
+		for(auto &candidate : candidatesRegister) {
+			float score = 0.0;
+			float aveValue = candidate.sumVals / patterns.size();
+			for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
+				float correlation = (candidate.correlations[outputIndex] - aveValue * meanErrors[outputIndex]) / sumSquareError;
+				candidate.lastCorrelations[outputIndex] = correlation;
+				candidate.correlations[outputIndex] = 0;
+				candidate.sumVals = 0;
+				score += fabs(correlation);
 			}

-			for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
-				candidate->weight(weightIndex) += derivatives[weightIndex] * 0.7;
-			}
-
-			if(firstStep || correlation > bestCorrelation) {
-				bestCorrelation = correlation;
-				bestCandidate = candidate;
-				std::swap(bestCorrelations, correlations);
-				firstStep = false;
+			if(score > bestCorrelation || step) {
+				bestCandidate = candidate.candidate;
+				bestCorrelation = score;
+				bestCorrelations = candidate.lastCorrelations;
+				step = false;
 			}
 		}

 		if(bestCorrelation <= lastCorrelation) {
 			iterationsWithoutIprovement++;
 		}
-
+//		std::cout << "sub iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
 	}
 	while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
-	//std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
+	std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";

 	return {bestCandidate, bestCorrelations};
 }