From 8ae08c4b945f4052229ac115e6553888612d8ab2 Mon Sep 17 00:00:00 2001 From: Shin Date: Mon, 9 May 2016 21:52:39 +0200 Subject: [PATCH] cascade... --- .../CascadeCorrelation.h | 14 +- .../CascadeCorrelation.cpp | 189 +++++++++++------- 2 files changed, 122 insertions(+), 81 deletions(-) diff --git a/include/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h b/include/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h index 1b1bf5f..0bafc1a 100644 --- a/include/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h +++ b/include/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h @@ -41,7 +41,7 @@ namespace NeuralNetwork { addBestCandidate(network, candidate); if(_maxRandomOutputWeights) { - error = trainOutputsRandom(0, network, patterns); + error = trainOutputsRandom(_epoch, network, patterns); } else { error = trainOutputs(network, patterns); } @@ -88,7 +88,7 @@ namespace NeuralNetwork { } protected: - std::shared_ptr _activFunction = std::make_shared(-4.9); + std::shared_ptr _activFunction = std::make_shared(-0.8); float _minimalErrorStep = 0.00005; float _maxError; float _weightRange; @@ -98,9 +98,9 @@ namespace NeuralNetwork { std::size_t _maxRandomOutputWeights = 0; std::size_t _numberOfCandidates; std::size_t _maxOutpuLearningIterations = 1000; - std::size_t _maxOutpuLearningIterationsWithoutChange = 5; - std::size_t _maxCandidateIterations = 20; - std::size_t _maxCandidateIterationsWithoutChange = 5; + std::size_t _maxOutpuLearningIterationsWithoutChange = 100; + std::size_t _maxCandidateIterations = 4000; + std::size_t _maxCandidateIterationsWithoutChange = 15; std::mt19937 _generator; std::uniform_real_distribution<> _distribution; @@ -139,9 +139,9 @@ namespace NeuralNetwork { for(auto &weight: weights) { weight *= 0.9; } - weights[weights.size() - 1] = -candidate.second[outIndex] / weightPortion;//_distribution(_generator); outIndex++; n->setWeights(weights); + n->weight(n->getWeights().size() - 1) = -candidate.second[outIndex] / weightPortion; } } @@ -154,7 +154,7 @@ namespace NeuralNetwork { candidates.back()->setActivationFunction(*_activFunction.get()); for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) { - candidates.back()->weight(weightIndex) = _distribution(_generator); + candidates.back()->weight(weightIndex) = _distribution(_generator) * 3.0; } } return candidates; diff --git a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp index 9cdbb51..8aa4ab3 100644 --- a/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp +++ b/src/NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.cpp @@ -10,8 +10,8 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec Learning::BackPropagation learner(p); for(std::size_t neuron = 0; neuron < outputs; neuron++) { - p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights()); - p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction()); + p[1][neuron + 1].setWeights(network.getOutputNeurons()[neuron]->getWeights()); + p[1][neuron + 1].setActivationFunction(network.getOutputNeurons()[neuron]->getActivationFunction()); } std::vector patternsForOutput; @@ -46,16 +46,18 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec } while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange); + std::cout << "outputLearning: " << error << ", last: " << lastError << ", iters: " << iteration << "\n"; for(std::size_t neuron = 0; neuron < outputs; neuron++) { - network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights()); + network.getOutputNeurons()[neuron]->setWeights(p[1][neuron + 1].getWeights()); } return error; } + float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network &network, const std::vector &patterns) { std::size_t outputs = patterns[0].second.size(); - std::vector possibleOutputs; + std::vector possibleOutputs; { // first networks is special possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1)); FeedForward::Network &p = (*possibleOutputs.back()); @@ -68,17 +70,17 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network } - std::size_t generatedNets =0; + std::size_t generatedNets = 0; - if(step ==0 ) { - generatedNets=_maxRandomOutputWeights; - } else if(step % 15 ==0 ){ - generatedNets=_maxRandomOutputWeights; + if(step == 0) { + generatedNets = _maxRandomOutputWeights; + } else if(step % 15 == 0) { + generatedNets = _maxRandomOutputWeights; } else { - generatedNets=_maxRandomOutputWeights/step; + generatedNets = _maxRandomOutputWeights / step; } - for(std::size_t net =0;net < generatedNets;net++) { + for(std::size_t net = 0; net < generatedNets; net++) { possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1)); FeedForward::Network &p = (*possibleOutputs.back()); p.appendLayer(outputs); @@ -98,10 +100,10 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network std::size_t bestNetwork = 0; float bestScore = std::numeric_limits::max(); - std::size_t index=0; + std::size_t index = 0; for(auto &net : possibleOutputs) { - auto &p=*net; + auto &p = *net; Learning::BackPropagation learner(p); float lastError; @@ -130,8 +132,8 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network } while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange); if(error < bestScore) { - bestScore=error; - bestNetwork=index; + bestScore = error; + bestNetwork = index; } index++; } @@ -146,6 +148,7 @@ float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network return bestScore; } + std::pair, std::vector> CascadeCorrelation::trainCandidates(Cascade::Network &network, std::vector> &candidates, const std::vector &patterns) { @@ -159,107 +162,145 @@ std::pair, std::vector> CascadeCor std::vector> errors(patterns.size()); std::vector meanErrors(outputs); - float sumSquareError=0; + float sumSquareError = 0; + std::vector> errorsReal(patterns.size()); for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) { auto &pattern = patterns[patternNumber]; errors[patternNumber].resize(network.outputs()); + errorsReal[patternNumber].resize(network.outputs()); std::vector output = network.computeOutput(patterns[patternNumber].first); for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) { - //float error = pow(pattern.second[outputIndex] - output[outputIndex], 2); + float diff = output[outputIndex] - pattern.second[outputIndex]; + //float diff = pattern.second[outputIndex] - output[outputIndex]; + auto neuron = network.getOutputNeurons()[outputIndex]; - float error = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output())*(output[outputIndex] - pattern.second[outputIndex]); + float derivation = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output()); + float error = derivation * diff; errors[patternNumber][outputIndex] = error; + errorsReal[patternNumber][outputIndex] = error; meanErrors[outputIndex] += error; - sumSquareError+=error*error; + sumSquareError += error * error; } } std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n) { n /= patterns.size(); }); + struct CAND { + std::vector correlations = {}; + std::vector lastCorrelations = {}; + std::vector slopes = {}; + float sumVals = 0; + std::shared_ptr candidate = nullptr; + }; + + std::vector candidatesRegister(candidates.size()); + + for(std::size_t i = 0; i < candidates.size(); i++) { + candidatesRegister[i].candidate = candidates[i]; + candidatesRegister[i].correlations.resize(outputs); + candidatesRegister[i].lastCorrelations.resize(outputs); + candidatesRegister[i].slopes.resize(patternsForOutput[0].first.size()); + } + std::size_t iterations = 0; std::size_t iterationsWithoutIprovement = 0; float bestCorrelation = 0; float lastCorrelation = 0; std::shared_ptr bestCandidate = nullptr; - std::vector bestCorrelations(errors[0].size()); + std::vector bestCorrelations(outputs); + + for(std::size_t patternIndex = 0; patternIndex < patterns.size(); patternIndex++) { + for(auto &candidateStruct : candidatesRegister) { + float value = (*candidateStruct.candidate)(patternsForOutput[patternIndex].first); + candidateStruct.sumVals += value; + for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) { + candidateStruct.correlations[outputIndex] -= value * meanErrors[outputIndex]; + } + } + } + + for(auto &candidate : candidatesRegister) { + float score = 0.0; + float aveValue = candidate.sumVals / patterns.size(); + for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) { + float correlation = (candidate.correlations[outputIndex] - aveValue * meanErrors[outputIndex]) / sumSquareError; + candidate.lastCorrelations[outputIndex] = correlation; + candidate.correlations[outputIndex] = 0; + candidate.sumVals = 0; + score += fabs(correlation); + } + } do { lastCorrelation = bestCorrelation; - bool firstStep = true; - for(auto &candidate : candidates) { - float correlation; - std::vector activations; - std::vector correlations(errors[0].size()); - std::vector correlationSigns(errors[0].size()); + /*cascor_cand_epoch*/ + for(std::size_t patternIndex = 0; patternIndex < patterns.size(); patternIndex++) { + for(auto &candidateStruct : candidatesRegister) { + auto candidate = candidateStruct.candidate; + float change = 0; + float activation = (*candidate)(patternsForOutput[patternIndex].first); + candidateStruct.sumVals += activation; - float activationSum=0.0; - for(auto &pattern:patternsForOutput) { - activations.push_back((*candidate)(pattern.first)); - activationSum+=activations.back(); - } + float derivation = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError; + for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) { + float error = errors[patternIndex][outputIndex]; - activationSum/=patternsForOutput.size(); + float direction = candidateStruct.lastCorrelations[outputIndex] < 0.0 ? -1.0 : 1.0; - for(std::size_t err = 0; err < meanErrors.size(); err++) { - for(std::size_t activ = 0; activ < activations.size(); activ++) { - correlations[err] += (activations[activ]*errors[activ][err] - activationSum * meanErrors[err]); + change -= direction * derivation * (error - meanErrors[outputIndex]); + + candidateStruct.correlations[outputIndex] -= error * activation; } - correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0; - } - correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0, [](const float &a, float b) { return a + fabs(b); }); - - if(std::isnan(correlation)) { - correlation=-5000; - } - - std::vector derivatives(candidate->getWeights().size()); - for(std::size_t input = 0; input < candidate->getWeights().size(); input++) { - float dcdw = 0.0; - - for(std::size_t err = 0; err < errors.size(); err++) { - float thetaO = 0.0; - for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) { - (*candidate)(patternsForOutput[err].first); - float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError; - if (std::isnan(derivative)) { - //std::cout << "isNan\n"; - derivative =1; - } - thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * patternsForOutput[err].first[input]; - } - dcdw += thetaO; + for(std::size_t i = 0; i < candidateStruct.slopes.size(); i++) { + candidateStruct.slopes[i] += change * patternsForOutput[patternIndex].first[i]; } - if(std::isnan(dcdw)) { - dcdw=0.1; - } - derivatives[input] = dcdw; + } + } + + /*adjust ci_weights*/ + for(auto &candidateStruct : candidatesRegister) { + auto candidate = candidateStruct.candidate; + for(std::size_t i = 0; i < candidateStruct.slopes.size(); i++) { + candidate->weight(i) += candidateStruct.slopes[i] * 2; + candidateStruct.slopes[i] = 0.0; + } + } + + /* adjust correlations*/ + bestCorrelation = 0; + bool step = true; + for(auto &candidate : candidatesRegister) { + float score = 0.0; + float aveValue = candidate.sumVals / patterns.size(); + for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) { + float correlation = (candidate.correlations[outputIndex] - aveValue * meanErrors[outputIndex]) / sumSquareError; + candidate.lastCorrelations[outputIndex] = correlation; + candidate.correlations[outputIndex] = 0; + candidate.sumVals = 0; + score += fabs(correlation); } - for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) { - candidate->weight(weightIndex) += derivatives[weightIndex] * 0.7; - } - - if(firstStep || correlation > bestCorrelation) { - bestCorrelation = correlation; - bestCandidate = candidate; - std::swap(bestCorrelations, correlations); - firstStep = false; + if(score > bestCorrelation || step) { + bestCandidate = candidate.candidate; + bestCorrelation = score; + bestCorrelations = candidate.lastCorrelations; + step = false; } } if(bestCorrelation <= lastCorrelation) { iterationsWithoutIprovement++; } - +// std::cout << "sub iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n"; } while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange); - //std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n"; + std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n"; return {bestCandidate, bestCorrelations}; } \ No newline at end of file