cascade correlation: sticking to implementation by Fahlman and not paper
This commit is contained in:
@@ -3,7 +3,6 @@
|
|||||||
#include "../Cascade/Network.h"
|
#include "../Cascade/Network.h"
|
||||||
#include "../FeedForward/Network.h"
|
#include "../FeedForward/Network.h"
|
||||||
#include "../Learning/QuickPropagation.h"
|
#include "../Learning/QuickPropagation.h"
|
||||||
#include "../ActivationFunction/Tangents.h"
|
|
||||||
|
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@@ -22,16 +21,16 @@ namespace NeuralNetwork {
|
|||||||
std::size_t inputs = patterns[0].first.size();
|
std::size_t inputs = patterns[0].first.size();
|
||||||
std::size_t outputs = patterns[0].second.size();
|
std::size_t outputs = patterns[0].second.size();
|
||||||
|
|
||||||
Cascade::Network network(inputs, outputs, NeuralNetwork::ActivationFunction::Tangents());
|
Cascade::Network network(inputs, outputs, *_activFunction.get());
|
||||||
|
|
||||||
network.randomizeWeights();
|
network.randomizeWeights();
|
||||||
|
|
||||||
int step = 0;
|
std::size_t step = 0;
|
||||||
float error = trainOutputs(network, patterns);
|
float error = trainOutputs(network, patterns);
|
||||||
while(step++ < 15 && error > _maxError) {
|
while(step++ < _maxHiddenUnits && error > _maxError) {
|
||||||
std::vector<std::shared_ptr<Neuron>> candidates = createCandidates(network.getNeuronSize() - outputs);
|
std::vector<std::shared_ptr<Neuron>> candidates = createCandidates(network.getNeuronSize() - outputs);
|
||||||
|
|
||||||
std::shared_ptr<Neuron> candidate=trainCandidates(network, candidates, patterns);
|
std::pair<std::shared_ptr<Neuron>, std::vector<float>> candidate = trainCandidates(network, candidates, patterns);
|
||||||
addBestCandidate(network, candidate);
|
addBestCandidate(network, candidate);
|
||||||
|
|
||||||
error = trainOutputs(network, patterns);
|
error = trainOutputs(network, patterns);
|
||||||
@@ -59,10 +58,20 @@ namespace NeuralNetwork {
|
|||||||
_distribution = std::uniform_real_distribution<>(-weightRange, weightRange);
|
_distribution = std::uniform_real_distribution<>(-weightRange, weightRange);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setMaximumHiddenNeurons(std::size_t neurons) {
|
||||||
|
_maxHiddenUnits = neurons;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setActivationFunction(const ActivationFunction::ActivationFunction &function) {
|
||||||
|
_activFunction = std::shared_ptr<ActivationFunction::ActivationFunction>(function.clone());
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
std::shared_ptr<ActivationFunction::ActivationFunction> _activFunction = std::make_shared<ActivationFunction::Sigmoid>(-4.9);
|
||||||
float _minimalErrorStep = 0.00005;
|
float _minimalErrorStep = 0.00005;
|
||||||
float _maxError;
|
float _maxError;
|
||||||
float _weightRange;
|
float _weightRange;
|
||||||
|
std::size_t _maxHiddenUnits = 20;
|
||||||
std::size_t _numberOfCandidates;
|
std::size_t _numberOfCandidates;
|
||||||
std::mt19937 _generator;
|
std::mt19937 _generator;
|
||||||
std::uniform_real_distribution<> _distribution;
|
std::uniform_real_distribution<> _distribution;
|
||||||
@@ -84,19 +93,23 @@ namespace NeuralNetwork {
|
|||||||
|
|
||||||
float trainOutputs(Cascade::Network &network, const std::vector<TrainingPattern> &patterns);
|
float trainOutputs(Cascade::Network &network, const std::vector<TrainingPattern> &patterns);
|
||||||
|
|
||||||
std::shared_ptr<Neuron> trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates, const std::vector<TrainingPattern> &patterns);
|
std::pair<std::shared_ptr<Neuron>, std::vector<float>> trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates,
|
||||||
|
const std::vector<TrainingPattern> &patterns);
|
||||||
|
|
||||||
void addBestCandidate(Cascade::Network &network, const std::shared_ptr<Neuron> &candidate) {
|
void addBestCandidate(Cascade::Network &network, const std::pair<std::shared_ptr<Neuron>, std::vector<float>> &candidate) {
|
||||||
auto neuron = network.addNeuron();
|
auto neuron = network.addNeuron();
|
||||||
|
|
||||||
neuron->setWeights(candidate->getWeights());
|
float weightPortion = network.getNeuronSize() - network.outputs();
|
||||||
neuron->setActivationFunction(candidate->getActivationFunction());
|
neuron->setWeights(candidate.first->getWeights());
|
||||||
|
neuron->setActivationFunction(candidate.first->getActivationFunction());
|
||||||
|
std::size_t outIndex = 0;
|
||||||
for(auto &n :network.getOutputNeurons()) {
|
for(auto &n :network.getOutputNeurons()) {
|
||||||
auto weights = n->getWeights();
|
auto weights = n->getWeights();
|
||||||
for(auto& weight: weights) {
|
for(auto &weight: weights) {
|
||||||
weight *=0.7;
|
weight *= 0.7;
|
||||||
}
|
}
|
||||||
weights[weights.size()-1] = _distribution(_generator);
|
weights[weights.size() - 1] = -candidate.second[outIndex] * weightPortion;//_distribution(_generator);
|
||||||
|
outIndex++;
|
||||||
n->setWeights(weights);
|
n->setWeights(weights);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -107,7 +120,7 @@ namespace NeuralNetwork {
|
|||||||
for(std::size_t i = 0; i < _numberOfCandidates; i++) {
|
for(std::size_t i = 0; i < _numberOfCandidates; i++) {
|
||||||
candidates.push_back(std::make_shared<Neuron>(id));
|
candidates.push_back(std::make_shared<Neuron>(id));
|
||||||
candidates.back()->setInputSize(id);
|
candidates.back()->setInputSize(id);
|
||||||
candidates.back()->setActivationFunction(NeuralNetwork::ActivationFunction::Tangents());
|
candidates.back()->setActivationFunction(*_activFunction.get());
|
||||||
|
|
||||||
for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) {
|
for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) {
|
||||||
candidates.back()->weight(weightIndex) = _distribution(_generator);
|
candidates.back()->weight(weightIndex) = _distribution(_generator);
|
||||||
|
|||||||
@@ -5,55 +5,58 @@ using namespace NeuralNetwork::ConstructiveAlgorihtms;
|
|||||||
float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) {
|
float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) {
|
||||||
std::size_t outputs = patterns[0].second.size();
|
std::size_t outputs = patterns[0].second.size();
|
||||||
|
|
||||||
FeedForward::Network p(network.getNeuronSize() - outputs-1);
|
FeedForward::Network p(network.getNeuronSize() - outputs - 1);
|
||||||
p.appendLayer(outputs);
|
p.appendLayer(outputs);
|
||||||
Learning::QuickPropagation learner(p);
|
Learning::QuickPropagation learner(p);
|
||||||
|
|
||||||
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||||
p[1][neuron+1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
|
p[1][neuron + 1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
|
||||||
p[1][neuron+1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
|
p[1][neuron + 1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
|
||||||
}
|
}
|
||||||
|
|
||||||
//std::cout << p.stringify() << "\n";
|
//std::cout << p.stringify() << "\n";
|
||||||
std::vector<TrainingPattern> patternsForOutput;
|
std::vector<TrainingPattern> patternsForOutput;
|
||||||
|
|
||||||
for(auto &pattern:patterns) {
|
for(auto &pattern:patterns) {
|
||||||
patternsForOutput.emplace_back(getInnerNeuronsOutput(network,pattern.first), pattern.second);
|
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
float lastError = std::numeric_limits<float>::max();
|
float lastError;
|
||||||
float error = std::numeric_limits<float>::max();
|
float error = std::numeric_limits<float>::max();
|
||||||
std::size_t iteration = 0;
|
std::size_t iteration = 0;
|
||||||
std::size_t iterWithoutImporvement=0;
|
std::size_t iterWithoutImporvement = 0;
|
||||||
do {
|
do {
|
||||||
lastError=error;
|
lastError = error;
|
||||||
for(auto &pattern:patternsForOutput) {
|
for(auto &pattern:patternsForOutput) {
|
||||||
learner.teach({pattern.first.begin()+1,pattern.first.end()}, pattern.second);
|
learner.teach({pattern.first.begin() + 1, pattern.first.end()}, pattern.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
for(auto &pattern:patternsForOutput) {
|
for(auto &pattern:patternsForOutput) {
|
||||||
std::vector<float> output = p.computeOutput(pattern.first);
|
std::vector<float> output = p.computeOutput({pattern.first.begin() + 1, pattern.first.end()});
|
||||||
for(std::size_t outputIndex = 0; outputIndex < output.size(); outputIndex++) {
|
for(std::size_t outputIndex = 0; outputIndex < output.size(); outputIndex++) {
|
||||||
error += pow(output[outputIndex] - pattern.second[outputIndex],2);
|
error += pow(output[outputIndex] - pattern.second[outputIndex], 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(fabs(lastError - error) < _minimalErrorStep) {
|
if(fabs(lastError - error) < _minimalErrorStep) {
|
||||||
iterWithoutImporvement++;
|
iterWithoutImporvement++;
|
||||||
}else {
|
} else {
|
||||||
iterWithoutImporvement=0;
|
iterWithoutImporvement = 0;
|
||||||
}
|
}
|
||||||
} while (iteration++ < 1000 && iterWithoutImporvement < 3);
|
}
|
||||||
std::cout << "iter: " << iteration << ", error: " << error << ", " << (lastError-error) << "\n";
|
while(iteration++ < 1000 && iterWithoutImporvement < 400);
|
||||||
|
std::cout << "iter: " << iteration << ", error: " << error << ", " << (lastError - error) << "\n";
|
||||||
|
|
||||||
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||||
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron+1].getWeights());
|
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron + 1].getWeights());
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<NeuralNetwork::Neuron> CascadeCorrelation::trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates, const std::vector<TrainingPattern> &patterns) {
|
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> CascadeCorrelation::trainCandidates(Cascade::Network &network,
|
||||||
|
std::vector<std::shared_ptr<Neuron>> &candidates,
|
||||||
|
const std::vector<TrainingPattern> &patterns) {
|
||||||
std::size_t outputs = patterns[0].second.size();
|
std::size_t outputs = patterns[0].second.size();
|
||||||
|
|
||||||
std::vector<TrainingPattern> patternsForOutput;
|
std::vector<TrainingPattern> patternsForOutput;
|
||||||
@@ -62,31 +65,34 @@ std::shared_ptr<NeuralNetwork::Neuron> CascadeCorrelation::trainCandidates(Casca
|
|||||||
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
|
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<float>> errors (patterns.size());
|
std::vector<std::vector<float>> errors(patterns.size());
|
||||||
std::vector<float> meanErrors (outputs);
|
std::vector<float> meanErrors(outputs);
|
||||||
for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
|
for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
|
||||||
auto &pattern=patterns[patternNumber];
|
auto &pattern = patterns[patternNumber];
|
||||||
errors[patternNumber].resize(network.outputs());
|
errors[patternNumber].resize(network.outputs());
|
||||||
|
|
||||||
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
|
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
|
||||||
for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
|
for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
|
||||||
float error = pow(pattern.second[outputIndex] - output[outputIndex],2);
|
float error = pow(pattern.second[outputIndex] - output[outputIndex], 2);
|
||||||
errors[patternNumber][outputIndex]=error;
|
errors[patternNumber][outputIndex] = error;
|
||||||
meanErrors[outputIndex] += error;
|
meanErrors[outputIndex] += error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n){ n/=patterns.size(); });
|
std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n) { n /= patterns.size(); });
|
||||||
std::size_t iterations=0;
|
|
||||||
std::size_t iterationsWithoutIprovement=0;
|
std::size_t iterations = 0;
|
||||||
float bestCorrelation=0;
|
std::size_t iterationsWithoutIprovement = 0;
|
||||||
float lastCorrelation=0;
|
float bestCorrelation = 0;
|
||||||
std::shared_ptr<Neuron> bestCandidate=nullptr;
|
float lastCorrelation = 0;
|
||||||
|
std::shared_ptr<Neuron> bestCandidate = nullptr;
|
||||||
|
|
||||||
|
std::vector<float> bestCorrelations(errors[0].size());
|
||||||
|
|
||||||
do {
|
do {
|
||||||
lastCorrelation = bestCorrelation;
|
lastCorrelation = bestCorrelation;
|
||||||
bool firstStep=true;
|
bool firstStep = true;
|
||||||
for(auto&candidate : candidates) {
|
for(auto &candidate : candidates) {
|
||||||
float correlation;
|
float correlation;
|
||||||
|
|
||||||
std::vector<float> activations;
|
std::vector<float> activations;
|
||||||
@@ -104,7 +110,8 @@ std::shared_ptr<NeuralNetwork::Neuron> CascadeCorrelation::trainCandidates(Casca
|
|||||||
correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
|
correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0);
|
correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0, [](const float &a, float b) { return a + fabs(b); });
|
||||||
|
|
||||||
std::vector<float> derivatives(candidate->getWeights().size());
|
std::vector<float> derivatives(candidate->getWeights().size());
|
||||||
for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
|
for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
|
||||||
float dcdw = 0.0;
|
float dcdw = 0.0;
|
||||||
@@ -122,13 +129,14 @@ std::shared_ptr<NeuralNetwork::Neuron> CascadeCorrelation::trainCandidates(Casca
|
|||||||
}
|
}
|
||||||
|
|
||||||
for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
|
for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
|
||||||
candidate->weight(weightIndex) += derivatives[weightIndex] * 0.1;
|
candidate->weight(weightIndex) += derivatives[weightIndex] * 0.7;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(firstStep || correlation > bestCorrelation) {
|
if(firstStep || correlation > bestCorrelation) {
|
||||||
bestCorrelation = correlation;
|
bestCorrelation = correlation;
|
||||||
bestCandidate=candidate;
|
bestCandidate = candidate;
|
||||||
firstStep=false;
|
std::swap(bestCorrelations, correlations);
|
||||||
|
firstStep = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -136,8 +144,10 @@ std::shared_ptr<NeuralNetwork::Neuron> CascadeCorrelation::trainCandidates(Casca
|
|||||||
iterationsWithoutIprovement++;
|
iterationsWithoutIprovement++;
|
||||||
}
|
}
|
||||||
|
|
||||||
} while (iterations ++ < 200 && iterationsWithoutIprovement <3);
|
}
|
||||||
|
while(iterations++ < 200 && iterationsWithoutIprovement < 300);
|
||||||
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
|
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
|
||||||
|
|
||||||
return bestCandidate;
|
return {bestCandidate, bestCorrelations};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user