cascade correlation implementation

This commit is contained in:
2016-05-08 12:53:00 +02:00
parent f6cfccb0e7
commit 383bfa994b
2 changed files with 162 additions and 20 deletions

View File

@@ -7,14 +7,13 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
FeedForward::Network p(network.getNeuronSize() - outputs - 1);
p.appendLayer(outputs);
Learning::QuickPropagation learner(p);
Learning::BackPropagation learner(p);
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
}
//std::cout << p.stringify() << "\n";
std::vector<TrainingPattern> patternsForOutput;
for(auto &pattern:patterns) {
@@ -45,8 +44,7 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
iterWithoutImporvement = 0;
}
}
while(iteration++ < 1000 && iterWithoutImporvement < 400);
std::cout << "iter: " << iteration << ", error: " << error << ", " << (lastError - error) << "\n";
while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights());
@@ -54,6 +52,100 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
return error;
}
float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) {
std::size_t outputs = patterns[0].second.size();
std::vector<FeedForward::Network*> possibleOutputs;
{ // first networks is special
possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
FeedForward::Network &p = (*possibleOutputs.back());
p.appendLayer(outputs);
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
}
}
std::size_t generatedNets =0;
if(step ==0 ) {
generatedNets=_maxRandomOutputWeights;
} else if(step % 15 ==0 ){
generatedNets=_maxRandomOutputWeights;
} else {
generatedNets=_maxRandomOutputWeights/step;
}
for(std::size_t net =0;net < generatedNets;net++) {
possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
FeedForward::Network &p = (*possibleOutputs.back());
p.appendLayer(outputs);
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
for(std::size_t weight = 0; weight < network.getNeuronSize() - outputs - 1; weight++) {
p[1][neuron + 1].weight(weight) = _distribution(_generator);
}
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
}
}
std::vector<TrainingPattern> patternsForOutput;
for(auto &pattern:patterns) {
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
}
std::size_t bestNetwork = 0;
float bestScore = std::numeric_limits<float>::max();
std::size_t index=0;
for(auto &net : possibleOutputs) {
auto &p=*net;
Learning::BackPropagation learner(p);
float lastError;
float error = std::numeric_limits<float>::max();
std::size_t iteration = 0;
std::size_t iterWithoutImporvement = 0;
do {
lastError = error;
for(auto &pattern:patternsForOutput) {
learner.teach({pattern.first.begin() + 1, pattern.first.end()}, pattern.second);
}
error = 0;
for(auto &pattern:patternsForOutput) {
std::vector<float> output = p.computeOutput({pattern.first.begin() + 1, pattern.first.end()});
for(std::size_t outputIndex = 0; outputIndex < output.size(); outputIndex++) {
error += pow(output[outputIndex] - pattern.second[outputIndex], 2);
}
}
if(fabs(lastError - error) < _minimalErrorStep) {
iterWithoutImporvement++;
} else {
iterWithoutImporvement = 0;
}
}
while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);
if(error < bestScore) {
bestScore=error;
bestNetwork=index;
}
index++;
}
FeedForward::Network &p = *possibleOutputs[bestNetwork];
std::cout << "network: " << bestNetwork << ", error: " << bestScore << "\n";
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights());
}
return bestScore;
}
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCorrelation::trainCandidates(Cascade::Network &network,
std::vector<std::shared_ptr<Neuron>> &candidates,
const std::vector<TrainingPattern> &patterns) {
@@ -75,10 +167,13 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
float error = pow(pattern.second[outputIndex] - output[outputIndex], 2);
//float error = pow(pattern.second[outputIndex] - output[outputIndex], 2);
auto neuron = network.getOutputNeurons()[outputIndex];
float error = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output())*(output[outputIndex] - pattern.second[outputIndex]);
errors[patternNumber][outputIndex] = error;
meanErrors[outputIndex] += error;
sumSquareError+=error;
sumSquareError+=error*error;
}
}
@@ -102,19 +197,27 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
std::vector<float> correlations(errors[0].size());
std::vector<float> correlationSigns(errors[0].size());
float activationSum=0.0;
for(auto &pattern:patternsForOutput) {
activations.push_back((*candidate)(pattern.first));
activationSum+=activations.back();
}
activationSum/=patternsForOutput.size();
for(std::size_t err = 0; err < meanErrors.size(); err++) {
for(std::size_t activ = 0; activ < activations.size(); activ++) {
correlations[err] += activations[activ] * (errors[activ][err] - meanErrors[err]);
correlations[err] += (activations[activ]*errors[activ][err] - activationSum * meanErrors[err]);
}
correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
}
correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0, [](const float &a, float b) { return a + fabs(b); });
if(std::isnan(correlation)) {
correlation=-5000;
}
std::vector<float> derivatives(candidate->getWeights().size());
for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
float dcdw = 0.0;
@@ -123,11 +226,18 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
float thetaO = 0.0;
for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
(*candidate)(patternsForOutput[err].first);
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output())/sumSquareError;
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError;
if (std::isnan(derivative)) {
//std::cout << "isNan\n";
derivative =1;
}
thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * patternsForOutput[err].first[input];
}
dcdw += thetaO;
}
if(std::isnan(dcdw)) {
dcdw=0.1;
}
derivatives[input] = dcdw;
}
@@ -148,8 +258,8 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
}
}
while(iterations++ < 200 && iterationsWithoutIprovement < 300);
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
//std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
return {bestCandidate, bestCorrelations};
}