cascade...

This commit is contained in:
2016-05-09 21:52:39 +02:00
parent 5a7f10ba81
commit 8ae08c4b94
2 changed files with 122 additions and 81 deletions

View File

@@ -41,7 +41,7 @@ namespace NeuralNetwork {
addBestCandidate(network, candidate); addBestCandidate(network, candidate);
if(_maxRandomOutputWeights) { if(_maxRandomOutputWeights) {
error = trainOutputsRandom(0, network, patterns); error = trainOutputsRandom(_epoch, network, patterns);
} else { } else {
error = trainOutputs(network, patterns); error = trainOutputs(network, patterns);
} }
@@ -88,7 +88,7 @@ namespace NeuralNetwork {
} }
protected: protected:
std::shared_ptr<ActivationFunction::ActivationFunction> _activFunction = std::make_shared<ActivationFunction::Sigmoid>(-4.9); std::shared_ptr<ActivationFunction::ActivationFunction> _activFunction = std::make_shared<ActivationFunction::Sigmoid>(-0.8);
float _minimalErrorStep = 0.00005; float _minimalErrorStep = 0.00005;
float _maxError; float _maxError;
float _weightRange; float _weightRange;
@@ -98,9 +98,9 @@ namespace NeuralNetwork {
std::size_t _maxRandomOutputWeights = 0; std::size_t _maxRandomOutputWeights = 0;
std::size_t _numberOfCandidates; std::size_t _numberOfCandidates;
std::size_t _maxOutpuLearningIterations = 1000; std::size_t _maxOutpuLearningIterations = 1000;
std::size_t _maxOutpuLearningIterationsWithoutChange = 5; std::size_t _maxOutpuLearningIterationsWithoutChange = 100;
std::size_t _maxCandidateIterations = 20; std::size_t _maxCandidateIterations = 4000;
std::size_t _maxCandidateIterationsWithoutChange = 5; std::size_t _maxCandidateIterationsWithoutChange = 15;
std::mt19937 _generator; std::mt19937 _generator;
std::uniform_real_distribution<> _distribution; std::uniform_real_distribution<> _distribution;
@@ -139,9 +139,9 @@ namespace NeuralNetwork {
for(auto &weight: weights) { for(auto &weight: weights) {
weight *= 0.9; weight *= 0.9;
} }
weights[weights.size() - 1] = -candidate.second[outIndex] / weightPortion;//_distribution(_generator);
outIndex++; outIndex++;
n->setWeights(weights); n->setWeights(weights);
n->weight(n->getWeights().size() - 1) = -candidate.second[outIndex] / weightPortion;
} }
} }
@@ -154,7 +154,7 @@ namespace NeuralNetwork {
candidates.back()->setActivationFunction(*_activFunction.get()); candidates.back()->setActivationFunction(*_activFunction.get());
for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) { for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) {
candidates.back()->weight(weightIndex) = _distribution(_generator); candidates.back()->weight(weightIndex) = _distribution(_generator) * 3.0;
} }
} }
return candidates; return candidates;

View File

@@ -10,8 +10,8 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
Learning::BackPropagation learner(p); Learning::BackPropagation learner(p);
for(std::size_t neuron = 0; neuron < outputs; neuron++) { for(std::size_t neuron = 0; neuron < outputs; neuron++) {
p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights()); p[1][neuron + 1].setWeights(network.getOutputNeurons()[neuron]->getWeights());
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction()); p[1][neuron + 1].setActivationFunction(network.getOutputNeurons()[neuron]->getActivationFunction());
} }
std::vector<TrainingPattern> patternsForOutput; std::vector<TrainingPattern> patternsForOutput;
@@ -46,16 +46,18 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
} }
while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange); while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);
std::cout << "outputLearning: " << error << ", last: " << lastError << ", iters: " << iteration << "\n";
for(std::size_t neuron = 0; neuron < outputs; neuron++) { for(std::size_t neuron = 0; neuron < outputs; neuron++) {
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights()); network.getOutputNeurons()[neuron]->setWeights(p[1][neuron + 1].getWeights());
} }
return error; return error;
} }
float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) { float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) {
std::size_t outputs = patterns[0].second.size(); std::size_t outputs = patterns[0].second.size();
std::vector<FeedForward::Network*> possibleOutputs; std::vector<FeedForward::Network *> possibleOutputs;
{ // first networks is special { // first networks is special
possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1)); possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
FeedForward::Network &p = (*possibleOutputs.back()); FeedForward::Network &p = (*possibleOutputs.back());
@@ -68,17 +70,17 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network
} }
std::size_t generatedNets =0; std::size_t generatedNets = 0;
if(step ==0 ) { if(step == 0) {
generatedNets=_maxRandomOutputWeights; generatedNets = _maxRandomOutputWeights;
} else if(step % 15 ==0 ){ } else if(step % 15 == 0) {
generatedNets=_maxRandomOutputWeights; generatedNets = _maxRandomOutputWeights;
} else { } else {
generatedNets=_maxRandomOutputWeights/step; generatedNets = _maxRandomOutputWeights / step;
} }
for(std::size_t net =0;net < generatedNets;net++) { for(std::size_t net = 0; net < generatedNets; net++) {
possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1)); possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
FeedForward::Network &p = (*possibleOutputs.back()); FeedForward::Network &p = (*possibleOutputs.back());
p.appendLayer(outputs); p.appendLayer(outputs);
@@ -98,10 +100,10 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network
std::size_t bestNetwork = 0; std::size_t bestNetwork = 0;
float bestScore = std::numeric_limits<float>::max(); float bestScore = std::numeric_limits<float>::max();
std::size_t index=0; std::size_t index = 0;
for(auto &net : possibleOutputs) { for(auto &net : possibleOutputs) {
auto &p=*net; auto &p = *net;
Learning::BackPropagation learner(p); Learning::BackPropagation learner(p);
float lastError; float lastError;
@@ -130,8 +132,8 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network
} }
while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange); while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);
if(error < bestScore) { if(error < bestScore) {
bestScore=error; bestScore = error;
bestNetwork=index; bestNetwork = index;
} }
index++; index++;
} }
@@ -146,6 +148,7 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network
return bestScore; return bestScore;
} }
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCorrelation::trainCandidates(Cascade::Network &network, std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCorrelation::trainCandidates(Cascade::Network &network,
std::vector<std::shared_ptr<Neuron>> &candidates, std::vector<std::shared_ptr<Neuron>> &candidates,
const std::vector<TrainingPattern> &patterns) { const std::vector<TrainingPattern> &patterns) {
@@ -159,107 +162,145 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
std::vector<std::vector<float>> errors(patterns.size()); std::vector<std::vector<float>> errors(patterns.size());
std::vector<float> meanErrors(outputs); std::vector<float> meanErrors(outputs);
float sumSquareError=0; float sumSquareError = 0;
std::vector<std::vector<float>> errorsReal(patterns.size());
for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) { for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
auto &pattern = patterns[patternNumber]; auto &pattern = patterns[patternNumber];
errors[patternNumber].resize(network.outputs()); errors[patternNumber].resize(network.outputs());
errorsReal[patternNumber].resize(network.outputs());
std::vector<float> output = network.computeOutput(patterns[patternNumber].first); std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) { for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
//float error = pow(pattern.second[outputIndex] - output[outputIndex], 2); float diff = output[outputIndex] - pattern.second[outputIndex];
//float diff = pattern.second[outputIndex] - output[outputIndex];
auto neuron = network.getOutputNeurons()[outputIndex]; auto neuron = network.getOutputNeurons()[outputIndex];
float error = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output())*(output[outputIndex] - pattern.second[outputIndex]); float derivation = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output());
float error = derivation * diff;
errors[patternNumber][outputIndex] = error; errors[patternNumber][outputIndex] = error;
errorsReal[patternNumber][outputIndex] = error;
meanErrors[outputIndex] += error; meanErrors[outputIndex] += error;
sumSquareError+=error*error; sumSquareError += error * error;
} }
} }
std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n) { n /= patterns.size(); }); std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n) { n /= patterns.size(); });
struct CAND {
std::vector<float> correlations = {};
std::vector<float> lastCorrelations = {};
std::vector<float> slopes = {};
float sumVals = 0;
std::shared_ptr<Neuron> candidate = nullptr;
};
std::vector<CAND> candidatesRegister(candidates.size());
for(std::size_t i = 0; i < candidates.size(); i++) {
candidatesRegister[i].candidate = candidates[i];
candidatesRegister[i].correlations.resize(outputs);
candidatesRegister[i].lastCorrelations.resize(outputs);
candidatesRegister[i].slopes.resize(patternsForOutput[0].first.size());
}
std::size_t iterations = 0; std::size_t iterations = 0;
std::size_t iterationsWithoutIprovement = 0; std::size_t iterationsWithoutIprovement = 0;
float bestCorrelation = 0; float bestCorrelation = 0;
float lastCorrelation = 0; float lastCorrelation = 0;
std::shared_ptr<Neuron> bestCandidate = nullptr; std::shared_ptr<Neuron> bestCandidate = nullptr;
std::vector<float> bestCorrelations(errors[0].size()); std::vector<float> bestCorrelations(outputs);
for(std::size_t patternIndex = 0; patternIndex < patterns.size(); patternIndex++) {
for(auto &candidateStruct : candidatesRegister) {
float value = (*candidateStruct.candidate)(patternsForOutput[patternIndex].first);
candidateStruct.sumVals += value;
for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
candidateStruct.correlations[outputIndex] -= value * meanErrors[outputIndex];
}
}
}
for(auto &candidate : candidatesRegister) {
float score = 0.0;
float aveValue = candidate.sumVals / patterns.size();
for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
float correlation = (candidate.correlations[outputIndex] - aveValue * meanErrors[outputIndex]) / sumSquareError;
candidate.lastCorrelations[outputIndex] = correlation;
candidate.correlations[outputIndex] = 0;
candidate.sumVals = 0;
score += fabs(correlation);
}
}
do { do {
lastCorrelation = bestCorrelation; lastCorrelation = bestCorrelation;
bool firstStep = true;
for(auto &candidate : candidates) {
float correlation;
std::vector<float> activations; /*cascor_cand_epoch*/
std::vector<float> correlations(errors[0].size()); for(std::size_t patternIndex = 0; patternIndex < patterns.size(); patternIndex++) {
std::vector<float> correlationSigns(errors[0].size()); for(auto &candidateStruct : candidatesRegister) {
auto candidate = candidateStruct.candidate;
float change = 0;
float activation = (*candidate)(patternsForOutput[patternIndex].first);
candidateStruct.sumVals += activation;
float activationSum=0.0; float derivation = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError;
for(auto &pattern:patternsForOutput) { for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
activations.push_back((*candidate)(pattern.first)); float error = errors[patternIndex][outputIndex];
activationSum+=activations.back();
}
activationSum/=patternsForOutput.size(); float direction = candidateStruct.lastCorrelations[outputIndex] < 0.0 ? -1.0 : 1.0;
for(std::size_t err = 0; err < meanErrors.size(); err++) { change -= direction * derivation * (error - meanErrors[outputIndex]);
for(std::size_t activ = 0; activ < activations.size(); activ++) {
correlations[err] += (activations[activ]*errors[activ][err] - activationSum * meanErrors[err]); candidateStruct.correlations[outputIndex] -= error * activation;
} }
correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
}
correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0, [](const float &a, float b) { return a + fabs(b); }); for(std::size_t i = 0; i < candidateStruct.slopes.size(); i++) {
candidateStruct.slopes[i] += change * patternsForOutput[patternIndex].first[i];
if(std::isnan(correlation)) {
correlation=-5000;
}
std::vector<float> derivatives(candidate->getWeights().size());
for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
float dcdw = 0.0;
for(std::size_t err = 0; err < errors.size(); err++) {
float thetaO = 0.0;
for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
(*candidate)(patternsForOutput[err].first);
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError;
if (std::isnan(derivative)) {
//std::cout << "isNan\n";
derivative =1;
}
thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * patternsForOutput[err].first[input];
}
dcdw += thetaO;
} }
if(std::isnan(dcdw)) { }
dcdw=0.1; }
}
derivatives[input] = dcdw; /*adjust ci_weights*/
for(auto &candidateStruct : candidatesRegister) {
auto candidate = candidateStruct.candidate;
for(std::size_t i = 0; i < candidateStruct.slopes.size(); i++) {
candidate->weight(i) += candidateStruct.slopes[i] * 2;
candidateStruct.slopes[i] = 0.0;
}
}
/* adjust correlations*/
bestCorrelation = 0;
bool step = true;
for(auto &candidate : candidatesRegister) {
float score = 0.0;
float aveValue = candidate.sumVals / patterns.size();
for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
float correlation = (candidate.correlations[outputIndex] - aveValue * meanErrors[outputIndex]) / sumSquareError;
candidate.lastCorrelations[outputIndex] = correlation;
candidate.correlations[outputIndex] = 0;
candidate.sumVals = 0;
score += fabs(correlation);
} }
for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) { if(score > bestCorrelation || step) {
candidate->weight(weightIndex) += derivatives[weightIndex] * 0.7; bestCandidate = candidate.candidate;
} bestCorrelation = score;
bestCorrelations = candidate.lastCorrelations;
if(firstStep || correlation > bestCorrelation) { step = false;
bestCorrelation = correlation;
bestCandidate = candidate;
std::swap(bestCorrelations, correlations);
firstStep = false;
} }
} }
if(bestCorrelation <= lastCorrelation) { if(bestCorrelation <= lastCorrelation) {
iterationsWithoutIprovement++; iterationsWithoutIprovement++;
} }
// std::cout << "sub iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
} }
while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange); while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
//std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n"; std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
return {bestCandidate, bestCorrelations}; return {bestCandidate, bestCorrelations};
} }