cascade correlation implementation
This commit is contained in:
@@ -10,9 +10,10 @@
|
|||||||
namespace NeuralNetwork {
|
namespace NeuralNetwork {
|
||||||
namespace ConstructiveAlgorihtms {
|
namespace ConstructiveAlgorihtms {
|
||||||
class CascadeCorrelation {
|
class CascadeCorrelation {
|
||||||
typedef std::pair<std::vector<float>, std::vector<float>> TrainingPattern;
|
|
||||||
public:
|
public:
|
||||||
CascadeCorrelation(std::size_t numberOfCandidate = 20, float maxError = 0.7) :
|
typedef std::pair<std::vector<float>, std::vector<float>> TrainingPattern;
|
||||||
|
|
||||||
|
CascadeCorrelation(std::size_t numberOfCandidate = 18, float maxError = 0.7) :
|
||||||
_maxError(maxError), _weightRange(0.3), _numberOfCandidates(numberOfCandidate), _generator(rand()), _distribution() {
|
_maxError(maxError), _weightRange(0.3), _numberOfCandidates(numberOfCandidate), _generator(rand()), _distribution() {
|
||||||
setWeightRange(_weightRange);
|
setWeightRange(_weightRange);
|
||||||
}
|
}
|
||||||
@@ -25,19 +26,27 @@ namespace NeuralNetwork {
|
|||||||
|
|
||||||
network.randomizeWeights();
|
network.randomizeWeights();
|
||||||
|
|
||||||
std::size_t step = 0;
|
_epoch = 0;
|
||||||
float error = trainOutputs(network, patterns);
|
float error;
|
||||||
while(step++ < _maxHiddenUnits && error > _maxError) {
|
if(_useProbabilisticOutputWeightSearch) {
|
||||||
|
error = trainOutputsRandom(0, network, patterns);
|
||||||
|
} else {
|
||||||
|
error = trainOutputs(network, patterns);
|
||||||
|
}
|
||||||
|
while(_epoch++ < _maxHiddenUnits && error > _maxError) {
|
||||||
std::vector<std::shared_ptr<Neuron>> candidates = createCandidates(network.getNeuronSize() - outputs);
|
std::vector<std::shared_ptr<Neuron>> candidates = createCandidates(network.getNeuronSize() - outputs);
|
||||||
|
|
||||||
std::pair<std::shared_ptr<Neuron>, std::vector<float>> candidate = trainCandidates(network, candidates, patterns);
|
std::pair<std::shared_ptr<Neuron>, std::vector<float>> candidate = trainCandidates(network, candidates, patterns);
|
||||||
|
|
||||||
addBestCandidate(network, candidate);
|
addBestCandidate(network, candidate);
|
||||||
|
|
||||||
error = trainOutputs(network, patterns);
|
if(_useProbabilisticOutputWeightSearch) {
|
||||||
|
error = trainOutputsRandom(0, network, patterns);
|
||||||
|
} else {
|
||||||
|
error = trainOutputs(network, patterns);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << step << ": " << error << "\n";
|
|
||||||
|
|
||||||
return network;
|
return network;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,13 +75,34 @@ namespace NeuralNetwork {
|
|||||||
_activFunction = std::shared_ptr<ActivationFunction::ActivationFunction>(function.clone());
|
_activFunction = std::shared_ptr<ActivationFunction::ActivationFunction>(function.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setProbabilisticOutputWeightSearch(bool status) {
|
||||||
|
_useProbabilisticOutputWeightSearch = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getProbabilisticOutputWeightSearch() const {
|
||||||
|
return _useProbabilisticOutputWeightSearch;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t getEpochs() const {
|
||||||
|
return _epoch;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::shared_ptr<ActivationFunction::ActivationFunction> _activFunction = std::make_shared<ActivationFunction::Sigmoid>(-4.9);
|
std::shared_ptr<ActivationFunction::ActivationFunction> _activFunction = std::make_shared<ActivationFunction::Sigmoid>(-4.9);
|
||||||
float _minimalErrorStep = 0.00005;
|
float _minimalErrorStep = 0.00005;
|
||||||
float _maxError;
|
float _maxError;
|
||||||
float _weightRange;
|
float _weightRange;
|
||||||
|
bool _useProbabilisticOutputWeightSearch = false;
|
||||||
|
|
||||||
|
std::size_t _epoch = 0;
|
||||||
std::size_t _maxHiddenUnits = 20;
|
std::size_t _maxHiddenUnits = 20;
|
||||||
|
std::size_t _maxRandomOutputWeights = 20;
|
||||||
std::size_t _numberOfCandidates;
|
std::size_t _numberOfCandidates;
|
||||||
|
std::size_t _maxOutpuLearningIterations = 1000;
|
||||||
|
std::size_t _maxOutpuLearningIterationsWithoutChange = 5;
|
||||||
|
std::size_t _maxCandidateIterations = 20;
|
||||||
|
std::size_t _maxCandidateIterationsWithoutChange = 5;
|
||||||
|
|
||||||
std::mt19937 _generator;
|
std::mt19937 _generator;
|
||||||
std::uniform_real_distribution<> _distribution;
|
std::uniform_real_distribution<> _distribution;
|
||||||
private:
|
private:
|
||||||
@@ -93,6 +123,8 @@ namespace NeuralNetwork {
|
|||||||
|
|
||||||
float trainOutputs(Cascade::Network &network, const std::vector<TrainingPattern> &patterns);
|
float trainOutputs(Cascade::Network &network, const std::vector<TrainingPattern> &patterns);
|
||||||
|
|
||||||
|
float trainOutputsRandom(std::size_t step, Cascade::Network &network, const std::vector<TrainingPattern> &patterns);
|
||||||
|
|
||||||
std::pair<std::shared_ptr<Neuron>, std::vector<float>> trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates,
|
std::pair<std::shared_ptr<Neuron>, std::vector<float>> trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates,
|
||||||
const std::vector<TrainingPattern> &patterns);
|
const std::vector<TrainingPattern> &patterns);
|
||||||
|
|
||||||
@@ -106,9 +138,9 @@ namespace NeuralNetwork {
|
|||||||
for(auto &n :network.getOutputNeurons()) {
|
for(auto &n :network.getOutputNeurons()) {
|
||||||
auto weights = n->getWeights();
|
auto weights = n->getWeights();
|
||||||
for(auto &weight: weights) {
|
for(auto &weight: weights) {
|
||||||
weight *= 0.7;
|
weight *= 0.9;
|
||||||
}
|
}
|
||||||
weights[weights.size() - 1] = -candidate.second[outIndex] * weightPortion;//_distribution(_generator);
|
weights[weights.size() - 1] = -candidate.second[outIndex] / weightPortion;//_distribution(_generator);
|
||||||
outIndex++;
|
outIndex++;
|
||||||
n->setWeights(weights);
|
n->setWeights(weights);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,14 +7,13 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
|
|||||||
|
|
||||||
FeedForward::Network p(network.getNeuronSize() - outputs - 1);
|
FeedForward::Network p(network.getNeuronSize() - outputs - 1);
|
||||||
p.appendLayer(outputs);
|
p.appendLayer(outputs);
|
||||||
Learning::QuickPropagation learner(p);
|
Learning::BackPropagation learner(p);
|
||||||
|
|
||||||
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||||
p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
|
p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
|
||||||
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
|
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
|
||||||
}
|
}
|
||||||
|
|
||||||
//std::cout << p.stringify() << "\n";
|
|
||||||
std::vector<TrainingPattern> patternsForOutput;
|
std::vector<TrainingPattern> patternsForOutput;
|
||||||
|
|
||||||
for(auto &pattern:patterns) {
|
for(auto &pattern:patterns) {
|
||||||
@@ -45,8 +44,7 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
|
|||||||
iterWithoutImporvement = 0;
|
iterWithoutImporvement = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while(iteration++ < 1000 && iterWithoutImporvement < 400);
|
while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);
|
||||||
std::cout << "iter: " << iteration << ", error: " << error << ", " << (lastError - error) << "\n";
|
|
||||||
|
|
||||||
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||||
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights());
|
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights());
|
||||||
@@ -54,6 +52,100 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float CascadeCorrelation::trainOutputsRandom(std::size_t step, Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) {
|
||||||
|
std::size_t outputs = patterns[0].second.size();
|
||||||
|
|
||||||
|
std::vector<FeedForward::Network*> possibleOutputs;
|
||||||
|
{ // first networks is special
|
||||||
|
possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
|
||||||
|
FeedForward::Network &p = (*possibleOutputs.back());
|
||||||
|
p.appendLayer(outputs);
|
||||||
|
|
||||||
|
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||||
|
p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
|
||||||
|
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t generatedNets =0;
|
||||||
|
|
||||||
|
if(step ==0 ) {
|
||||||
|
generatedNets=_maxRandomOutputWeights;
|
||||||
|
} else if(step % 15 ==0 ){
|
||||||
|
generatedNets=_maxRandomOutputWeights;
|
||||||
|
} else {
|
||||||
|
generatedNets=_maxRandomOutputWeights/step;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(std::size_t net =0;net < generatedNets;net++) {
|
||||||
|
possibleOutputs.emplace_back(new FeedForward::Network(network.getNeuronSize() - outputs - 1));
|
||||||
|
FeedForward::Network &p = (*possibleOutputs.back());
|
||||||
|
p.appendLayer(outputs);
|
||||||
|
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||||
|
for(std::size_t weight = 0; weight < network.getNeuronSize() - outputs - 1; weight++) {
|
||||||
|
p[1][neuron + 1].weight(weight) = _distribution(_generator);
|
||||||
|
}
|
||||||
|
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<TrainingPattern> patternsForOutput;
|
||||||
|
|
||||||
|
for(auto &pattern:patterns) {
|
||||||
|
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t bestNetwork = 0;
|
||||||
|
float bestScore = std::numeric_limits<float>::max();
|
||||||
|
std::size_t index=0;
|
||||||
|
|
||||||
|
for(auto &net : possibleOutputs) {
|
||||||
|
auto &p=*net;
|
||||||
|
Learning::BackPropagation learner(p);
|
||||||
|
|
||||||
|
float lastError;
|
||||||
|
float error = std::numeric_limits<float>::max();
|
||||||
|
std::size_t iteration = 0;
|
||||||
|
std::size_t iterWithoutImporvement = 0;
|
||||||
|
do {
|
||||||
|
lastError = error;
|
||||||
|
for(auto &pattern:patternsForOutput) {
|
||||||
|
learner.teach({pattern.first.begin() + 1, pattern.first.end()}, pattern.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
error = 0;
|
||||||
|
for(auto &pattern:patternsForOutput) {
|
||||||
|
std::vector<float> output = p.computeOutput({pattern.first.begin() + 1, pattern.first.end()});
|
||||||
|
for(std::size_t outputIndex = 0; outputIndex < output.size(); outputIndex++) {
|
||||||
|
error += pow(output[outputIndex] - pattern.second[outputIndex], 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(fabs(lastError - error) < _minimalErrorStep) {
|
||||||
|
iterWithoutImporvement++;
|
||||||
|
} else {
|
||||||
|
iterWithoutImporvement = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while(iteration++ < _maxOutpuLearningIterations && iterWithoutImporvement < _maxOutpuLearningIterationsWithoutChange);
|
||||||
|
if(error < bestScore) {
|
||||||
|
bestScore=error;
|
||||||
|
bestNetwork=index;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
FeedForward::Network &p = *possibleOutputs[bestNetwork];
|
||||||
|
|
||||||
|
std::cout << "network: " << bestNetwork << ", error: " << bestScore << "\n";
|
||||||
|
|
||||||
|
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||||
|
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights());
|
||||||
|
}
|
||||||
|
return bestScore;
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCorrelation::trainCandidates(Cascade::Network &network,
|
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCorrelation::trainCandidates(Cascade::Network &network,
|
||||||
std::vector<std::shared_ptr<Neuron>> &candidates,
|
std::vector<std::shared_ptr<Neuron>> &candidates,
|
||||||
const std::vector<TrainingPattern> &patterns) {
|
const std::vector<TrainingPattern> &patterns) {
|
||||||
@@ -75,10 +167,13 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
|
|||||||
|
|
||||||
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
|
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
|
||||||
for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
|
for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
|
||||||
float error = pow(pattern.second[outputIndex] - output[outputIndex], 2);
|
//float error = pow(pattern.second[outputIndex] - output[outputIndex], 2);
|
||||||
|
auto neuron = network.getOutputNeurons()[outputIndex];
|
||||||
|
float error = neuron->getActivationFunction().derivatedOutput(neuron->value(), neuron->output())*(output[outputIndex] - pattern.second[outputIndex]);
|
||||||
|
|
||||||
errors[patternNumber][outputIndex] = error;
|
errors[patternNumber][outputIndex] = error;
|
||||||
meanErrors[outputIndex] += error;
|
meanErrors[outputIndex] += error;
|
||||||
sumSquareError+=error;
|
sumSquareError+=error*error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -102,19 +197,27 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
|
|||||||
std::vector<float> correlations(errors[0].size());
|
std::vector<float> correlations(errors[0].size());
|
||||||
std::vector<float> correlationSigns(errors[0].size());
|
std::vector<float> correlationSigns(errors[0].size());
|
||||||
|
|
||||||
|
float activationSum=0.0;
|
||||||
for(auto &pattern:patternsForOutput) {
|
for(auto &pattern:patternsForOutput) {
|
||||||
activations.push_back((*candidate)(pattern.first));
|
activations.push_back((*candidate)(pattern.first));
|
||||||
|
activationSum+=activations.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
activationSum/=patternsForOutput.size();
|
||||||
|
|
||||||
for(std::size_t err = 0; err < meanErrors.size(); err++) {
|
for(std::size_t err = 0; err < meanErrors.size(); err++) {
|
||||||
for(std::size_t activ = 0; activ < activations.size(); activ++) {
|
for(std::size_t activ = 0; activ < activations.size(); activ++) {
|
||||||
correlations[err] += activations[activ] * (errors[activ][err] - meanErrors[err]);
|
correlations[err] += (activations[activ]*errors[activ][err] - activationSum * meanErrors[err]);
|
||||||
}
|
}
|
||||||
correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
|
correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0, [](const float &a, float b) { return a + fabs(b); });
|
correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0, [](const float &a, float b) { return a + fabs(b); });
|
||||||
|
|
||||||
|
if(std::isnan(correlation)) {
|
||||||
|
correlation=-5000;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<float> derivatives(candidate->getWeights().size());
|
std::vector<float> derivatives(candidate->getWeights().size());
|
||||||
for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
|
for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
|
||||||
float dcdw = 0.0;
|
float dcdw = 0.0;
|
||||||
@@ -123,11 +226,18 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
|
|||||||
float thetaO = 0.0;
|
float thetaO = 0.0;
|
||||||
for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
|
for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
|
||||||
(*candidate)(patternsForOutput[err].first);
|
(*candidate)(patternsForOutput[err].first);
|
||||||
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output())/sumSquareError;
|
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output()) / sumSquareError;
|
||||||
|
if (std::isnan(derivative)) {
|
||||||
|
//std::cout << "isNan\n";
|
||||||
|
derivative =1;
|
||||||
|
}
|
||||||
thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * patternsForOutput[err].first[input];
|
thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * patternsForOutput[err].first[input];
|
||||||
}
|
}
|
||||||
dcdw += thetaO;
|
dcdw += thetaO;
|
||||||
}
|
}
|
||||||
|
if(std::isnan(dcdw)) {
|
||||||
|
dcdw=0.1;
|
||||||
|
}
|
||||||
derivatives[input] = dcdw;
|
derivatives[input] = dcdw;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,8 +258,8 @@ std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCor
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
while(iterations++ < 200 && iterationsWithoutIprovement < 300);
|
while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
|
||||||
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
|
//std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
|
||||||
|
|
||||||
return {bestCandidate, bestCorrelations};
|
return {bestCandidate, bestCorrelations};
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user