Cascade2 algo implementation

This commit is contained in:
2016-05-13 20:18:43 +02:00
parent 9e2ce222fb
commit 58abfea7a2
2 changed files with 358 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
#pragma once
#include "../Cascade/Network.h"
#include "../FeedForward/Network.h"
#include "../Learning/QuickPropagation.h"
#include "CascadeCorrelation.h"
#include <random>
#include <algorithm>
// http://fann.cvs.sourceforge.net/viewvc/fann/fann/src/fann_cascade.c?view=markup
// https://github.com/gtomar/cascade
namespace NeuralNetwork {
namespace ConstructiveAlgorihtms {
class Cascade2 : public CascadeCorrelation {
public:
typedef std::pair<std::vector<float>, std::vector<float>> TrainingPattern;
Cascade2(std::size_t numberOfCandidate = 18, float maxError = 0.7) : CascadeCorrelation(numberOfCandidate, maxError) {
}
protected:
virtual std::pair<std::shared_ptr<Neuron>, std::vector<float>> trainCandidates(Cascade::Network &network, std::vector<std::shared_ptr<Neuron>> &candidates,
const std::vector<TrainingPattern> &patterns) override;
};
}
}

View File

@@ -0,0 +1,327 @@
#include <NeuralNetwork/ConstructiveAlgorithms/Cascade2.h>
using namespace NeuralNetwork::ConstructiveAlgorihtms;
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> Cascade2::trainCandidates(Cascade::Network &network,
std::vector<std::shared_ptr<Neuron>> &candidates,
const std::vector<TrainingPattern> &patterns) {
std::size_t outputs = patterns[0].second.size();
std::vector<TrainingPattern> patternsForOutput;
float sumSqDiffs=0.0;
for(auto &pattern:patterns) {
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
}
std::vector <std::vector<float>> errors(patterns.size());
for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
auto &pattern = patterns[patternNumber];
errors[patternNumber].resize(network.outputs());
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
float diff = output[outputIndex]-pattern.second[outputIndex];
errors[patternNumber][outputIndex] = diff;
sumSqDiffs+=diff*diff;
}
}
std::size_t iterations = 0;
std::size_t iterationsWithoutIprovement = 0;
float bestCorrelation = 0;
float lastCorrelation = 0;
std::size_t bestCandidateIndex=0;
std::shared_ptr<Neuron> bestCandidate = nullptr;
std::vector<std::vector<float>> candidateWeights(candidates.size());
for(auto &w: candidateWeights) {
w.resize(outputs);
for(auto &output: w) {
output = fabs(_distribution(_generator))*0.5;
}
}
//compute Correlation Epoch
do {
lastCorrelation = bestCorrelation;
bool firstStep = true;
std::size_t candidateIndex=0;
for(auto &candidate : candidates) {
float score=sumSqDiffs;
std::vector<float> slopes(candidate->getWeights().size());
std::vector<float> outSlopes(outputs);
std::size_t patternIndex=0;
for(auto &pattern:patternsForOutput) {
float errSum = 0.0;
float activationValue =(*candidate)(pattern.first);
float derivatived = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
for(std::size_t output = 0; output < outputs; output++) {
float weight = candidateWeights[candidateIndex][output];
float diff = activationValue * weight - errors[patternIndex][output];
float goalDir= pattern.second[output] <0.0? -1.0 :1.0;
float diffDir= diff >0.0? -1.0 :1.0;
score -= (diff * diff);
outSlopes[output] -= 2.0 * diff * activationValue;
errSum += diff * weight;
patternIndex++;
}
errSum*= derivatived;
for(std::size_t input = 0; input < pattern.first.size(); input++) {
slopes[input] -= errSum*pattern.first[input];
}
}
for(std::size_t weightIndex = 0; weightIndex < slopes.size(); weightIndex++) {
candidate->weight(weightIndex) += slopes[weightIndex] * 0.7 / (patterns.size());/// (patterns.size() * patterns[0].first.size());
}
for(std::size_t weightIndex = 0; weightIndex < outSlopes.size(); weightIndex++) {
candidateWeights[candidateIndex][weightIndex] += outSlopes[weightIndex] * 0.7 / (patterns.size());/// (patterns.size() * patterns[0].first.size());
}
if(firstStep || score > bestCorrelation) {
bestCorrelation = score;
bestCandidate = candidate;
firstStep = false;
bestCandidateIndex=candidateIndex;
}
candidateIndex++;
}
if(bestCorrelation <= lastCorrelation) {
iterationsWithoutIprovement++;
}
}
while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
for(auto &a : candidateWeights[bestCandidateIndex]) {
a*=-1.0;
}
return {bestCandidate, candidateWeights[bestCandidateIndex]};
}
/*
*
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> Cascade2::trainCandidates(Cascade::Network &network,
std::vector<std::shared_ptr<Neuron>> &candidates,
const std::vector<TrainingPattern> &patterns) {
std::size_t outputs = patterns[0].second.size();
std::vector<TrainingPattern> patternsForOutput;
float sumSqDiffs=0.0;
for(auto &pattern:patterns) {
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
}
std::vector<float> errors(patterns.size());
for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
auto &pattern = patterns[patternNumber];
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
float diff = output[outputIndex]-pattern.second[outputIndex];
errors[outputIndex] += diff;
sumSqDiffs+=diff*diff;
}
}
std::size_t iterations = 0;
std::size_t iterationsWithoutIprovement = 0;
float bestCorrelation = 0;
float lastCorrelation = 0;
std::size_t bestCandidateIndex=0;
std::shared_ptr<Neuron> bestCandidate = nullptr;
std::vector<std::vector<float>> candidateWeights(candidates.size());
for(auto &w: candidateWeights) {
w.resize(outputs);
for(auto &output: w) {
output = fabs(_distribution(_generator));
}
}
//compute Correlation Epoch
do {
lastCorrelation = bestCorrelation;
bool firstStep = true;
std::size_t candidateIndex=0;
for(auto &candidate : candidates) {
float score=sumSqDiffs;
std::vector<float> slopes(candidate->getWeights().size());
std::vector<float> outSlopes(outputs);
for(auto &pattern:patternsForOutput) {
float errSum = 0.0;
float activationValue =(*candidate)(pattern.first);
float derivatived = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
for(std::size_t output = 0; output < outputs; output++) {
float weight = candidateWeights[candidateIndex][output];
float diff = activationValue * weight - errors[output];
float goalDir= pattern.second[output] <0.0? -1.0 :1.0;
float diffDir= diff >0.0? -1.0 :1.0;
score -= (diff * diff);
outSlopes[output] += diff * activationValue;
errSum += diff * weight;
}
errSum*= derivatived;
for(std::size_t input = 0; input < pattern.first.size(); input++) {
slopes[input] += errSum*pattern.first[input];
}
}
for(std::size_t weightIndex = 0; weightIndex < slopes.size(); weightIndex++) {
candidate->weight(weightIndex) += slopes[weightIndex] * 0.7/ (patterns.size() * patterns[0].first.size());
}
for(std::size_t weightIndex = 0; weightIndex < outSlopes.size(); weightIndex++) {
candidateWeights[candidateIndex][weightIndex] += outSlopes[weightIndex] * 0.7/ (patterns.size() * patterns[0].first.size());
}
if(firstStep || score > bestCorrelation) {
bestCorrelation = score;
bestCandidate = candidate;
firstStep = false;
bestCandidateIndex=candidateIndex;
}
candidateIndex++;
}
if(bestCorrelation <= lastCorrelation) {
iterationsWithoutIprovement++;
}
}
while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
for(auto &a : candidateWeights[bestCandidateIndex]) {
a*=-1.0;
}
return {bestCandidate, candidateWeights[bestCandidateIndex]};
}
*/
/*
std::pair<std::shared_ptr<NeuralNetwork::Neuron>, std::vector<float>> Cascade2::trainCandidates(Cascade::Network &network,
std::vector<std::shared_ptr<Neuron>> &candidates,
const std::vector<TrainingPattern> &patterns) {
std::size_t outputs = patterns[0].second.size();
std::vector<TrainingPattern> patternsForOutput;
std::vector<FeedForward::Network*> patternNets;
for(auto &pattern:patterns) {
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
}
std::vector<float> errors(patterns.size());
for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
auto &pattern = patterns[patternNumber];
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
patternNets.push_back(new FeedForward::Network(patternsForOutput[patternNumber].first.size()-1));
auto patternNetwork = patternNets.back();
auto &hidden = patternNetwork->appendLayer(2);
auto &outputLayer = patternNetwork->appendLayer(outputs);
for(std::size_t outputIndex = 0; outputIndex < outputs; outputIndex++) {
outputLayer[outputIndex+1].weight(0) = network.getOutputNeurons()[outputIndex]->value();
float diff = pattern.second[outputIndex] - output[outputIndex];
errors[outputIndex] += diff;
}
}
std::size_t iterations = 0;
std::size_t iterationsWithoutIprovement = 0;
float bestCorrelation = 0;
float lastCorrelation = 0;
std::size_t bestCandidateIndex=0;
std::vector<std::vector<float>> candidateWeights(candidates.size());
for(auto &w: candidateWeights) {
w.resize(outputs);
for(auto &output: w) {
output = fabs(_distribution(_generator));
}
}
std::vector<float>candidateScores(candidates.size());
//compute Correlation Epoch
do {
std::fill(candidateScores.begin(),candidateScores.end(),0.0);
lastCorrelation = bestCorrelation;
for(std::size_t patternIndex=0;patternIndex<patternsForOutput.size();patternIndex++) {
std::size_t candidateIndex=0;
auto &pattern = patternsForOutput[patternIndex];
auto net = patternNets[patternIndex];
Learning::BackPropagation bp(*net);
for(auto &candidate : candidates) {
float score = 0;
(*net)[1][1].setWeights(candidate->getWeights());
for(std::size_t outputNeuron=0;outputNeuron<outputs;outputNeuron++) {
(*net)[2][outputNeuron+1].weight(1)=candidateWeights[candidateIndex][outputNeuron];
}
bp.teach(pattern.first,pattern.second);
auto res = net->computeOutput(pattern.first);
for(std::size_t outputNeuron=0;outputNeuron<outputs;outputNeuron++) {
candidateWeights[candidateIndex][outputNeuron]=(*net)[2][outputNeuron+1].weight(1);
candidateScores[candidateIndex]+=res[outputNeuron]*res[outputNeuron];
}
candidate->setWeights((*net)[1][1].getWeights());
candidateIndex++;
}
}
bestCorrelation=candidateScores[0];
bestCandidateIndex=0;
for(std::size_t index=1;index < candidateScores.size();index++) {
if(bestCorrelation > candidateScores[index]) {
bestCandidateIndex = index;
}
}
if(bestCorrelation <= lastCorrelation) {
iterationsWithoutIprovement++;
}
}
while(iterations++ < _maxCandidateIterations && iterationsWithoutIprovement < _maxCandidateIterationsWithoutChange);
std::cout << "iter: " << iterations << ", correlation: " << bestCorrelation << ", " << lastCorrelation << "\n";
for(auto &net:patternNets) {
delete net;
}
return {candidates[bestCandidateIndex], candidateWeights[bestCandidateIndex]};
}
*/