This commit is contained in:
2016-05-07 10:59:54 +02:00
parent 58f7f8f69b
commit 47484fc45c
3 changed files with 280 additions and 9 deletions

View File

@@ -1,6 +1,7 @@
#pragma once
#include "../Network.h"
#include <random>
namespace NeuralNetwork {
namespace Cascade {
@@ -10,7 +11,7 @@ namespace NeuralNetwork {
* @brief Constructor for Network
* @param _inputSize is number of inputs to network
*/
Network(std::size_t inputSize, std::size_t outputSize) : NeuralNetwork::Network(inputSize,outputSize) {
Network(std::size_t inputSize, std::size_t outputSize, const ActivationFunction::ActivationFunction &activationFunction=ActivationFunction::Sigmoid(-4.9)) : NeuralNetwork::Network(inputSize,outputSize) {
_neurons.push_back(std::make_shared<BiasNeuron>());
for(std::size_t i = 0; i < inputSize; i++) {
@@ -18,7 +19,7 @@ namespace NeuralNetwork {
}
for(std::size_t i = 0; i < outputSize; i++) {
_neurons.push_back(std::make_shared<Neuron>(_neurons.size()));
_neurons.push_back(std::make_shared<Neuron>(_neurons.size(),activationFunction));
_neurons.back()->setInputSize(inputSize + 1); // +1 is bias
}
}
@@ -29,8 +30,8 @@ namespace NeuralNetwork {
compute[0] = 1.0;
for(std::size_t i = 1; i <= _inputs; i++) {
compute[i] = input[i - 1];
for(std::size_t i = 0; i < _inputs; i++) {
compute[i+1] = input[i];
}
// 0 is bias, 1-_inputSize is input
@@ -45,14 +46,22 @@ namespace NeuralNetwork {
return _neurons.size();
}
const std::vector<std::shared_ptr<NeuronInterface>>& getNeurons() {
return _neurons;
}
std::shared_ptr<NeuronInterface> getNeuron(std::size_t id) {
return _neurons[id];
}
std::vector<std::shared_ptr<NeuronInterface>> getOutputNeurons() {
return std::vector<std::shared_ptr<NeuronInterface>>(_neurons.end()-_outputs,_neurons.end());
}
std::shared_ptr<NeuronInterface> addNeuron() {
_neurons.push_back(std::make_shared<Neuron>());
auto neuron = _neurons.back();
neuron->setInputSize(_neurons.size() - _outputs);
neuron->setInputSize(_neurons.size() - _outputs-1);
// 0 is bias, 1-_inputSize is input
std::size_t maxIndexOfNeuron = _neurons.size() - 1;
// move output to right position
@@ -93,12 +102,13 @@ namespace NeuralNetwork {
return std::unique_ptr<Network>(net);
}
//I I H H O O 6
void randomizeWeights() {
for(std::size_t neuron = _neurons.size() - _outputs; neuron < _neurons.size(); neuron++) {
for(std::size_t weight = 0; weight < _neurons.size() - _outputs; weight++) {
_neurons[neuron]->weight(weight) = 1.0 - static_cast<float>(rand() % 2001) / 1000.0;
std::mt19937 _generator(rand());
std::uniform_real_distribution<> _distribution(-0.3,0.3);
for(auto& neuron :getOutputNeurons()) {
for(std::size_t weight = 0; weight < neuron->getWeights().size(); weight++) {
neuron->weight(weight) = _distribution(_generator);
}
}
}

View File

@@ -0,0 +1,128 @@
#pragma once
#include "../Cascade/Network.h"
#include "../FeedForward/Network.h"
#include "../Learning/QuickPropagation.h"
#include "../ActivationFunction/Tangents.h"
#include <random>
#include <algorithm>
namespace NeuralNetwork {
namespace ConstructiveAlgorihtms {
class CascadeCorrelation {
typedef std::pair<std::vector<float>, std::vector<float>> TrainingPattern;
public:
CascadeCorrelation(std::size_t numberOfCandidate = 2, float maxError = 0.7) :
_maxError(maxError), _weightRange(1), _numberOfCandidates(numberOfCandidate), _generator(rand()), _distribution() {
setWeightRange(_weightRange);
}
Cascade::Network construct(const std::vector<TrainingPattern> &patterns) {
std::size_t inputs = patterns[0].first.size();
std::size_t outputs = patterns[0].second.size();
Cascade::Network network(inputs, outputs, NeuralNetwork::ActivationFunction::Tangents());
network.randomizeWeights();
int step = 0;
float error = trainOutputs(network, patterns);
while(step++ < 20 && error > _maxError) {
std::shared_ptr<Neuron> candidate = createCandidate(network.getNeuronSize() - outputs);
trainCandidates(network, candidate, patterns);
addBestCandidate(network, candidate);
error = trainOutputs(network, patterns);
}
std::cout << step << ": " << error << "\n";
return network;
}
std::size_t getNumberOfCandidates() const {
return _numberOfCandidates;
}
void setNumberOfCandidates(std::size_t numberOfCandidates) {
_numberOfCandidates = numberOfCandidates;
}
float getWeightRange() const {
return _weightRange;
}
void setWeightRange(float weightRange) {
_weightRange = weightRange;
_distribution = std::uniform_real_distribution<>(-weightRange, weightRange);
}
protected:
float _minimalErrorStep = 0.0005;
float _maxError;
float _weightRange;
std::size_t _numberOfCandidates;
std::mt19937 _generator;
std::uniform_real_distribution<> _distribution;
private:
std::vector<float> getInnerNeuronsOutput(Cascade::Network &network, const std::vector<float> &input) {
std::vector<float> output = network.computeOutput(input);
std::vector<float> outputOfUnits(network.getNeuronSize() - output.size());
outputOfUnits[0] = 1.0;
for(std::size_t i = 0; i < input.size(); i++) {
outputOfUnits[i + 1] = input[i];
}
for(std::size_t i = input.size() + 1; i < network.getNeuronSize() - output.size(); i++) {
outputOfUnits[i] = network.getNeuron(i)->output();
}
return outputOfUnits;
}
float trainOutputs(Cascade::Network &network, const std::vector<TrainingPattern> &patterns);
void trainCandidates(Cascade::Network &network, std::shared_ptr<Neuron> &candidates, const std::vector<TrainingPattern> &patterns);
void addBestCandidate(Cascade::Network &network, const std::shared_ptr<Neuron> &candidate) {
auto neuron = network.addNeuron();
//auto tmp = candidate->getWeights();
//std::fill(tmp.begin(),tmp.end(),0.2);
//neuron->setWeights(tmp);
neuron->setWeights(candidate->getWeights());
neuron->setActivationFunction(candidate->getActivationFunction());
for(auto &n :network.getOutputNeurons()) {
auto weights = n->getWeights();
for(auto& weight: weights) {
weight *=0.7;
}
weights[weights.size()-1] = _distribution(_generator);
n->setWeights(weights);
//n->weight(n->getWeights().size() - 1) = _distribution(_generator);
//n->weight(n->getWeights().size() - 1) = 0.2;//.distribution(_generator);
}
}
std::shared_ptr<Neuron> createCandidate(std::size_t id) { //TODO
std::vector<std::shared_ptr<Neuron>> candidates;
for(std::size_t i = 0; i < _numberOfCandidates; i++) {
candidates.push_back(std::make_shared<Neuron>(id));
candidates.back()->setInputSize(id);
candidates.back()->setActivationFunction(NeuralNetwork::ActivationFunction::Tangents());
for(std::size_t weightIndex = 0; weightIndex < id; weightIndex++) {
candidates.back()->weight(weightIndex) = _distribution(_generator);
// candidates.back()->weight(weightIndex) = 0.1;//_distribution(_generator);
}
}
return candidates[0];
}
};
}
}

View File

@@ -0,0 +1,133 @@
#include <NeuralNetwork/ConstructiveAlgorithms/CascadeCorrelation.h>
using namespace NeuralNetwork::ConstructiveAlgorihtms;
float CascadeCorrelation::trainOutputs(Cascade::Network &network, const std::vector<CascadeCorrelation::TrainingPattern> &patterns) {
std::size_t outputs = patterns[0].second.size();
FeedForward::Network p(network.getNeuronSize() - outputs-1);
p.appendLayer(outputs);
Learning::QuickPropagation learner(p);
learner.setLearningCoefficient(0.9);
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
p[1][neuron+1].setWeights(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getWeights());
p[1][neuron+1].setActivationFunction(network.getNeuron(network.getNeuronSize() - outputs + neuron)->getActivationFunction());
}
std::cout << p.stringify() << "\n";
std::vector<TrainingPattern> patternsForOutput;
for(auto &pattern:patterns) {
patternsForOutput.emplace_back(getInnerNeuronsOutput(network,pattern.first), pattern.second);
}
float lastError = std::numeric_limits<float>::max();
float error = std::numeric_limits<float>::max();
std::size_t iteration = 0;
std::size_t iterWithoutImporvement=0;
do {
lastError=error;
for(auto &pattern:patternsForOutput) {
learner.teach({pattern.first.begin()+1,pattern.first.end()}, pattern.second);
}
error = 0;
for(auto &pattern:patternsForOutput) {
std::vector<float> output = p.computeOutput(pattern.first);
for(std::size_t outputIndex = 0; outputIndex < output.size(); outputIndex++) {
error += pow(output[outputIndex] - pattern.second[outputIndex],2);
}
}
if(lastError - error < _minimalErrorStep) {
iterWithoutImporvement++;
}else {
iterWithoutImporvement=0;
}
} while (iteration++ < 500 && iterWithoutImporvement < 300);
std::cout << "iter: " << iteration << ", error: " << error << ", " << (lastError-error) << "\n";
for(std::size_t neuron = 0; neuron < outputs; neuron++) {
network.getNeuron(network.getNeuronSize() - outputs + neuron)->setWeights(p[1][neuron+1].getWeights());
}
return error;
}
void CascadeCorrelation::trainCandidates(Cascade::Network &network, std::shared_ptr<Neuron> &candidate, const std::vector<TrainingPattern> &patterns) {
std::size_t outputs = patterns[0].second.size();
std::vector<TrainingPattern> patternsForOutput;
for(auto &pattern:patterns) {
patternsForOutput.emplace_back(getInnerNeuronsOutput(network, pattern.first), pattern.second);
}
std::vector<std::vector<float>> errors (patterns.size());
std::vector<float> meanErrors (outputs);
for(std::size_t patternNumber = 0; patternNumber < patterns.size(); patternNumber++) {
auto &pattern=patterns[patternNumber];
errors[patternNumber].resize(network.outputs());
std::vector<float> output = network.computeOutput(patterns[patternNumber].first);
for(std::size_t outputIndex = 0; outputIndex < network.outputs(); outputIndex++) {
float error = pow(pattern.second[outputIndex] - output[outputIndex],2);
errors[patternNumber][outputIndex]=error;
meanErrors[outputIndex] += error;
}
}
std::for_each(meanErrors.begin(), meanErrors.end(), [&patterns](float &n){ n/=patterns.size(); });
std::size_t iterations=0;
std::size_t iterationsWithoutIprovement=0;
float lastCorrelations=0;
float correlation=std::numeric_limits<float>::max();
do {
lastCorrelations=correlation;
std::vector<float>activations;
std::vector<float>correlations(errors[0].size());
std::vector<float>correlationSigns(errors[0].size());
for(auto &pattern:patternsForOutput) {
activations.push_back((*candidate)(pattern.first));
}
for(std::size_t err=0;err<meanErrors.size();err++) {
for(std::size_t activ=0;activ<activations.size();activ++) {
correlations[err] += activations[activ] * (errors[activ][err] - meanErrors[err]);
}
correlationSigns[err] = correlations[err] > 0? 1.0 : -1.0;
}
correlation = std::accumulate(correlations.begin(), correlations.end(),0.0);
std::vector<float> derivatives(candidate->getWeights().size());
for (std::size_t input=0;input<candidate->getWeights().size();input++) {
float dcdw = 0.0;
for(std::size_t err=0;err<errors.size();err++) {
float thetaO = 0.0;
for(std::size_t meanError = 0; meanError < meanErrors.size(); meanError++) {
(*candidate)(patternsForOutput[err].first);
float derivative = candidate->getActivationFunction().derivatedOutput(candidate->value(), candidate->output());
thetaO+=correlationSigns[meanError] * (errors[err][meanError] - meanErrors [meanError]) * derivative * candidate->weight(input);
}
dcdw += thetaO;
}
derivatives[input]=dcdw;
}
for(std::size_t weightIndex = 0; weightIndex < derivatives.size(); weightIndex++) {
candidate->weight(weightIndex) += derivatives[weightIndex]*0.1;
}
if(correlation+0.0001 <= lastCorrelations) {
iterationsWithoutIprovement++;
} else {
iterationsWithoutIprovement=0;
}
std::cout << correlation << "\n";
} while (iterations ++ < 200 && iterationsWithoutIprovement <3);
}