cascade correlation: refactoring

2016-05-07 21:17:57 +02:00
parent 36ce3f6463
commit eaafc27211
2 changed files with 61 additions and 58 deletions
--- a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
+++ b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp
@@ -8,14 +8,13 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
 	FeedForward::Network p(network.getNeuronSize() - outputs-1);
 	p.appendLayer(outputs);
 	Learning::QuickPropagation learner(p);
-	learner.setLearningCoefficient(0.9);

 	for(std::size_t neuron = 0; neuron < outputs; neuron++) {
 		p[1][neuron+1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
 		p[1][neuron+1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
 	}

-	std::cout << p.stringify() << "\n";
+	//std::cout << p.stringify() << "\n";
 	std::vector<TrainingPattern> patternsForOutput;

 	for(auto &pattern:patterns) {
@@ -40,12 +39,12 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
 			}
 		}

-		if(lastError - error < _minimalErrorStep) {
+		if(fabs(lastError - error) < _minimalErrorStep) {
 			iterWithoutImporvement++;
 		}else {
 			iterWithoutImporvement=0;
 		}
-	} while (iteration++ < 500 && iterWithoutImporvement < 300);
+	} while (iteration++ < 1000 && iterWithoutImporvement < 3);
 	std::cout << "iter: " << iteration << ", error: " << error << ", " << (lastError-error) <<  "\n";

 	for(std::size_t neuron = 0; neuron < outputs; neuron++) {
@@ -54,7 +53,7 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
 	return error;
 }

-void CascadeCorrelation::trainCandidates(Cascade::Network &network, std::shared_ptr<Neuron> &candidate, const std::vector<TrainingPattern> &patterns) {
+std::shared_ptr<NeuralNetwork::Neuron> CascadeCorrelation::trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates, const std::vector<TrainingPattern> &patterns) {
 	std::size_t outputs = patterns[0].second.size();

 	std::vector<TrainingPattern> patternsForOutput;
@@ -78,56 +77,67 @@ void CascadeCorrelation::trainCandidates(Cascade::Network &network, std::shared_
 	}

 	std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n){ n/=patterns.size(); });
-
 	std::size_t iterations=0;
 	std::size_t iterationsWithoutIprovement=0;
-	float lastCorrelations=0;
-	float correlation=std::numeric_limits<float>::max();
+	float bestCorrelation=0;
+	float lastCorrelation=0;
+	std::shared_ptr<Neuron> bestCandidate=nullptr;
+
 	do {
-		lastCorrelations=correlation;
+		lastCorrelation = bestCorrelation;
+		bool firstStep=true;
+		for(auto&candidate : candidates) {
+			float correlation;

-		std::vector<float>activations;
-		std::vector<float>correlations(errors[0].size());
-		std::vector<float>correlationSigns(errors[0].size());
+			std::vector<float> activations;
+			std::vector<float> correlations(errors[0].size());
+			std::vector<float> correlationSigns(errors[0].size());

-		for(auto &pattern:patternsForOutput) {
-			activations.push_back((*candidate)(pattern.first));
-		}
-
-		for(std::size_t err=0;err<meanErrors.size();err++) {
-			for(std::size_t activ=0;activ<activations.size();activ++) {
-				correlations[err] += activations[activ] * (errors[activ][err] - meanErrors[err]);
+			for(auto &pattern:patternsForOutput) {
+				activations.push_back((*candidate)(pattern.first));
 			}
-			correlationSigns[err] = correlations[err] > 0? 1.0 : -1.0;
-		}

-		correlation = std::accumulate(correlations.begin(), correlations.end(),0.0);
-		std::vector<float> derivatives(candidate->getWeights().size());
-		for (std::size_t input=0;input<candidate->getWeights().size();input++)  {
-			float dcdw = 0.0;
-
-			for(std::size_t err=0;err<errors.size();err++) {
-				float thetaO = 0.0;
-				for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
-					(*candidate)(patternsForOutput[err].first);
-					float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
-					thetaO+=correlationSigns[meanError] * (errors[err][meanError] - meanErrors [meanError]) * derivative * candidate->weight(input);
+			for(std::size_t err = 0; err < meanErrors.size(); err++) {
+				for(std::size_t activ = 0; activ < activations.size(); activ++) {
+					correlations[err] += activations[activ] * (errors[activ][err] - meanErrors[err]);
 				}
-				dcdw += thetaO;
+				correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
+			}
+
+			correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0);
+			std::vector<float> derivatives(candidate->getWeights().size());
+			for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
+				float dcdw = 0.0;
+
+				for(std::size_t err = 0; err < errors.size(); err++) {
+					float thetaO = 0.0;
+					for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
+						(*candidate)(patternsForOutput[err].first);
+						float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
+						thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * candidate->weight(input);
+					}
+					dcdw += thetaO;
+				}
+				derivatives[input] = dcdw;
+			}
+
+			for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
+				candidate->weight(weightIndex) += derivatives[weightIndex] * 0.1;
+			}
+
+			if(firstStep || correlation > bestCorrelation) {
+				bestCorrelation = correlation;
+				bestCandidate=candidate;
+				firstStep=false;
 			}
-			derivatives[input]=dcdw;
 		}

-		for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
-			candidate->weight(weightIndex) += derivatives[weightIndex]*0.1;
-		}
-
-		if(correlation+0.0001 <= lastCorrelations) {
+		if(bestCorrelation <= lastCorrelation) {
 			iterationsWithoutIprovement++;
-		} else {
-			iterationsWithoutIprovement=0;
 		}

-		std::cout << correlation << "\n";
 	} while (iterations ++ < 200 && iterationsWithoutIprovement <3);
+	std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation <<  "\n";
+
+	return bestCandidate;
 }