cascade correlation: refactoring
This commit is contained in:
@@ -13,8 +13,8 @@ namespace NeuralNetwork {
|
||||
class CascadeCorrelation {
|
||||
typedef std::pair<std::vector<float>, std::vector<float>> TrainingPattern;
|
||||
public:
|
||||
CascadeCorrelation(std::size_t numberOfCandidate = 2, float maxError = 0.7) :
|
||||
_maxError(maxError), _weightRange(1), _numberOfCandidates(numberOfCandidate), _generator(rand()), _distribution() {
|
||||
CascadeCorrelation(std::size_t numberOfCandidate = 20, float maxError = 0.7) :
|
||||
_maxError(maxError), _weightRange(0.3), _numberOfCandidates(numberOfCandidate), _generator(rand()), _distribution() {
|
||||
setWeightRange(_weightRange);
|
||||
}
|
||||
|
||||
@@ -28,10 +28,10 @@ namespace NeuralNetwork {
|
||||
|
||||
int step = 0;
|
||||
float error = trainOutputs(network, patterns);
|
||||
while(step++ < 20 && error > _maxError) {
|
||||
std::shared_ptr<Neuron> candidate = createCandidate(network.getNeuronSize() - outputs);
|
||||
while(step++ < 15 && error > _maxError) {
|
||||
std::vector<std::shared_ptr<Neuron>> candidates = createCandidates(network.getNeuronSize() - outputs);
|
||||
|
||||
trainCandidates(network, candidate, patterns);
|
||||
std::shared_ptr<Neuron> candidate=trainCandidates(network, candidates, patterns);
|
||||
addBestCandidate(network, candidate);
|
||||
|
||||
error = trainOutputs(network, patterns);
|
||||
@@ -60,7 +60,7 @@ namespace NeuralNetwork {
|
||||
}
|
||||
|
||||
protected:
|
||||
float _minimalErrorStep = 0.0005;
|
||||
float _minimalErrorStep = 0.00005;
|
||||
float _maxError;
|
||||
float _weightRange;
|
||||
std::size_t _numberOfCandidates;
|
||||
@@ -84,15 +84,11 @@ namespace NeuralNetwork {
|
||||
|
||||
float trainOutputs(Cascade::Network &network, const std::vector<TrainingPattern> &patterns);
|
||||
|
||||
void trainCandidates(Cascade::Network &network, std::shared_ptr<Neuron> &candidates, const std::vector<TrainingPattern> &patterns);
|
||||
std::shared_ptr<Neuron> trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates, const std::vector<TrainingPattern> &patterns);
|
||||
|
||||
void addBestCandidate(Cascade::Network &network, const std::shared_ptr<Neuron> &candidate) {
|
||||
auto neuron = network.addNeuron();
|
||||
|
||||
//auto tmp = candidate->getWeights();
|
||||
//std::fill(tmp.begin(),tmp.end(),0.2);
|
||||
//neuron->setWeights(tmp);
|
||||
|
||||
neuron->setWeights(candidate->getWeights());
|
||||
neuron->setActivationFunction(candidate->getActivationFunction());
|
||||
for(auto &n :network.getOutputNeurons()) {
|
||||
@@ -102,12 +98,10 @@ namespace NeuralNetwork {
|
||||
}
|
||||
weights[weights.size()-1] = _distribution(_generator);
|
||||
n->setWeights(weights);
|
||||
//n->weight(n->getWeights().size() - 1) = _distribution(_generator);
|
||||
//n->weight(n->getWeights().size() - 1) = 0.2;//.distribution(_generator);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Neuron> createCandidate(std::size_t id) { //TODO
|
||||
std::vector<std::shared_ptr<Neuron>> createCandidates(std::size_t id) {
|
||||
std::vector<std::shared_ptr<Neuron>> candidates;
|
||||
|
||||
for(std::size_t i = 0; i < _numberOfCandidates; i++) {
|
||||
@@ -117,10 +111,9 @@ namespace NeuralNetwork {
|
||||
|
||||
for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) {
|
||||
candidates.back()->weight(weightIndex) = _distribution(_generator);
|
||||
// candidates.back()->weight(weightIndex) = 0.1;//_distribution(_generator);
|
||||
}
|
||||
}
|
||||
return candidates[0];
|
||||
return candidates;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -8,14 +8,13 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
|
||||
FeedForward::Network p(network.getNeuronSize() - outputs-1);
|
||||
p.appendLayer(outputs);
|
||||
Learning::QuickPropagation learner(p);
|
||||
learner.setLearningCoefficient(0.9);
|
||||
|
||||
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||
p[1][neuron+1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
|
||||
p[1][neuron+1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
|
||||
}
|
||||
|
||||
std::cout << p.stringify() << "\n";
|
||||
//std::cout << p.stringify() << "\n";
|
||||
std::vector<TrainingPattern> patternsForOutput;
|
||||
|
||||
for(auto &pattern:patterns) {
|
||||
@@ -40,12 +39,12 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
|
||||
}
|
||||
}
|
||||
|
||||
if(lastError - error < _minimalErrorStep) {
|
||||
if(fabs(lastError - error) < _minimalErrorStep) {
|
||||
iterWithoutImporvement++;
|
||||
}else {
|
||||
iterWithoutImporvement=0;
|
||||
}
|
||||
} while (iteration++ < 500 && iterWithoutImporvement < 300);
|
||||
} while (iteration++ < 1000 && iterWithoutImporvement < 3);
|
||||
std::cout << "iter: " << iteration << ", error: " << error << ", " << (lastError-error) << "\n";
|
||||
|
||||
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
|
||||
@@ -54,7 +53,7 @@ float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vec
|
||||
return error;
|
||||
}
|
||||
|
||||
void CascadeCorrelation::trainCandidates(Cascade::Network &network, std::shared_ptr<Neuron> &candidate, const std::vector<TrainingPattern> &patterns) {
|
||||
std::shared_ptr<NeuralNetwork::Neuron> CascadeCorrelation::trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates, const std::vector<TrainingPattern> &patterns) {
|
||||
std::size_t outputs = patterns[0].second.size();
|
||||
|
||||
std::vector<TrainingPattern> patternsForOutput;
|
||||
@@ -78,56 +77,67 @@ void CascadeCorrelation::trainCandidates(Cascade::Network &network, std::shared_
|
||||
}
|
||||
|
||||
std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n){ n/=patterns.size(); });
|
||||
|
||||
std::size_t iterations=0;
|
||||
std::size_t iterationsWithoutIprovement=0;
|
||||
float lastCorrelations=0;
|
||||
float correlation=std::numeric_limits<float>::max();
|
||||
float bestCorrelation=0;
|
||||
float lastCorrelation=0;
|
||||
std::shared_ptr<Neuron> bestCandidate=nullptr;
|
||||
|
||||
do {
|
||||
lastCorrelations=correlation;
|
||||
lastCorrelation = bestCorrelation;
|
||||
bool firstStep=true;
|
||||
for(auto&candidate : candidates) {
|
||||
float correlation;
|
||||
|
||||
std::vector<float>activations;
|
||||
std::vector<float>correlations(errors[0].size());
|
||||
std::vector<float>correlationSigns(errors[0].size());
|
||||
std::vector<float> activations;
|
||||
std::vector<float> correlations(errors[0].size());
|
||||
std::vector<float> correlationSigns(errors[0].size());
|
||||
|
||||
for(auto &pattern:patternsForOutput) {
|
||||
activations.push_back((*candidate)(pattern.first));
|
||||
}
|
||||
|
||||
for(std::size_t err=0;err<meanErrors.size();err++) {
|
||||
for(std::size_t activ=0;activ<activations.size();activ++) {
|
||||
correlations[err] += activations[activ] * (errors[activ][err] - meanErrors[err]);
|
||||
for(auto &pattern:patternsForOutput) {
|
||||
activations.push_back((*candidate)(pattern.first));
|
||||
}
|
||||
correlationSigns[err] = correlations[err] > 0? 1.0 : -1.0;
|
||||
}
|
||||
|
||||
correlation = std::accumulate(correlations.begin(), correlations.end(),0.0);
|
||||
std::vector<float> derivatives(candidate->getWeights().size());
|
||||
for (std::size_t input=0;input<candidate->getWeights().size();input++) {
|
||||
float dcdw = 0.0;
|
||||
|
||||
for(std::size_t err=0;err<errors.size();err++) {
|
||||
float thetaO = 0.0;
|
||||
for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
|
||||
(*candidate)(patternsForOutput[err].first);
|
||||
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
|
||||
thetaO+=correlationSigns[meanError] * (errors[err][meanError] - meanErrors [meanError]) * derivative * candidate->weight(input);
|
||||
for(std::size_t err = 0; err < meanErrors.size(); err++) {
|
||||
for(std::size_t activ = 0; activ < activations.size(); activ++) {
|
||||
correlations[err] += activations[activ] * (errors[activ][err] - meanErrors[err]);
|
||||
}
|
||||
dcdw += thetaO;
|
||||
correlationSigns[err] = correlations[err] > 0 ? 1.0 : -1.0;
|
||||
}
|
||||
|
||||
correlation = std::accumulate(correlations.begin(), correlations.end(), 0.0);
|
||||
std::vector<float> derivatives(candidate->getWeights().size());
|
||||
for(std::size_t input = 0; input < candidate->getWeights().size(); input++) {
|
||||
float dcdw = 0.0;
|
||||
|
||||
for(std::size_t err = 0; err < errors.size(); err++) {
|
||||
float thetaO = 0.0;
|
||||
for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
|
||||
(*candidate)(patternsForOutput[err].first);
|
||||
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
|
||||
thetaO += correlationSigns[meanError] * (errors[err][meanError] - meanErrors[meanError]) * derivative * candidate->weight(input);
|
||||
}
|
||||
dcdw += thetaO;
|
||||
}
|
||||
derivatives[input] = dcdw;
|
||||
}
|
||||
|
||||
for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
|
||||
candidate->weight(weightIndex) += derivatives[weightIndex] * 0.1;
|
||||
}
|
||||
|
||||
if(firstStep || correlation > bestCorrelation) {
|
||||
bestCorrelation = correlation;
|
||||
bestCandidate=candidate;
|
||||
firstStep=false;
|
||||
}
|
||||
derivatives[input]=dcdw;
|
||||
}
|
||||
|
||||
for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
|
||||
candidate->weight(weightIndex) += derivatives[weightIndex]*0.1;
|
||||
}
|
||||
|
||||
if(correlation+0.0001 <= lastCorrelations) {
|
||||
if(bestCorrelation <= lastCorrelation) {
|
||||
iterationsWithoutIprovement++;
|
||||
} else {
|
||||
iterationsWithoutIprovement=0;
|
||||
}
|
||||
|
||||
std::cout << correlation << "\n";
|
||||
} while (iterations ++ < 200 && iterationsWithoutIprovement <3);
|
||||
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
|
||||
|
||||
return bestCandidate;
|
||||
}
|
||||
Reference in New Issue
Block a user