learning: naming in bp changed and qp modified
This commit is contained in:
@@ -16,7 +16,7 @@ namespace Learning {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
inline BackPropagation(FeedForward::Network &feedForwardNetwork, CorrectionFunction::CorrectionFunction *correction = new CorrectionFunction::Linear()):
|
inline BackPropagation(FeedForward::Network &feedForwardNetwork, CorrectionFunction::CorrectionFunction *correction = new CorrectionFunction::Linear()):
|
||||||
network(feedForwardNetwork), correctionFunction(correction),learningCoefficient(0.4), deltas() {
|
network(feedForwardNetwork), correctionFunction(correction),learningCoefficient(0.4), slopes() {
|
||||||
resize();
|
resize();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,24 +34,26 @@ namespace Learning {
|
|||||||
protected:
|
protected:
|
||||||
|
|
||||||
virtual inline void resize() {
|
virtual inline void resize() {
|
||||||
if(deltas.size()!=network.size())
|
if(slopes.size()!=network.size())
|
||||||
deltas.resize(network.size());
|
slopes.resize(network.size());
|
||||||
|
|
||||||
for(std::size_t i=0; i < network.size(); i++) {
|
for(std::size_t i=0; i < network.size(); i++) {
|
||||||
if(deltas[i].size()!=network[i].size())
|
if(slopes[i].size()!=network[i].size())
|
||||||
deltas[i].resize(network[i].size());
|
slopes[i].resize(network[i].size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void updateWeights(const std::vector<float> &input);
|
virtual void updateWeights(const std::vector<float> &input);
|
||||||
|
|
||||||
|
virtual void computeDeltas(const std::vector<float> &expectation);
|
||||||
|
|
||||||
FeedForward::Network &network;
|
FeedForward::Network &network;
|
||||||
|
|
||||||
CorrectionFunction::CorrectionFunction *correctionFunction;
|
CorrectionFunction::CorrectionFunction *correctionFunction;
|
||||||
|
|
||||||
float learningCoefficient;
|
float learningCoefficient;
|
||||||
|
|
||||||
std::vector<std::vector<float>> deltas;
|
std::vector<std::vector<float>> slopes;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -16,7 +16,7 @@ namespace NeuralNetwork {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
inline QuickPropagation(FeedForward::Network &feedForwardNetwork, CorrectionFunction::CorrectionFunction *correction = new CorrectionFunction::Linear()):
|
inline QuickPropagation(FeedForward::Network &feedForwardNetwork, CorrectionFunction::CorrectionFunction *correction = new CorrectionFunction::Linear()):
|
||||||
BackPropagation(feedForwardNetwork,correction),deltasPrev() {
|
BackPropagation(feedForwardNetwork,correction),previousSlopes() {
|
||||||
resize();
|
resize();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,32 +24,49 @@ namespace NeuralNetwork {
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
float _maxChange=1.75;
|
||||||
|
float _epsilon=0.5;
|
||||||
|
|
||||||
virtual inline void resize() override {
|
virtual inline void resize() override {
|
||||||
if(deltas.size()!=network.size())
|
if(slopes.size()!=network.size())
|
||||||
deltas.resize(network.size());
|
slopes.resize(network.size());
|
||||||
|
|
||||||
for(std::size_t i=0; i < network.size(); i++) {
|
for(std::size_t i=0; i < network.size(); i++) {
|
||||||
if(deltas[i].size()!=network[i].size())
|
if(slopes[i].size()!=network[i].size())
|
||||||
deltas[i].resize(network[i].size());
|
slopes[i].resize(network[i].size());
|
||||||
}
|
}
|
||||||
|
|
||||||
if(deltasPrev.size()!=network.size())
|
if(previousSlopes.size()!=network.size())
|
||||||
deltasPrev.resize(network.size());
|
previousSlopes.resize(network.size());
|
||||||
|
|
||||||
for(std::size_t i=0; i < network.size(); i++) {
|
for(std::size_t i=0; i < network.size(); i++) {
|
||||||
if(deltasPrev[i].size()!=network[i].size())
|
if(previousSlopes[i].size()!=network[i].size())
|
||||||
deltasPrev[i].resize(network[i].size());
|
previousSlopes[i].resize(network[i].size());
|
||||||
|
|
||||||
for(std::size_t j=0; j < deltasPrev[i].size(); j++) {
|
for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
|
||||||
deltasPrev[i][j]=1.0;
|
previousSlopes[i][j]=1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(lastWeightChange.size()!=network.size())
|
||||||
|
lastWeightChange.resize(network.size());
|
||||||
|
|
||||||
|
for(std::size_t i=0; i < network.size(); i++) {
|
||||||
|
if(lastWeightChange[i].size()!=network[i].size())
|
||||||
|
lastWeightChange[i].resize(network[i].size());
|
||||||
|
|
||||||
|
for(std::size_t j=0; j < previousSlopes[i].size(); j++) {
|
||||||
|
lastWeightChange[i][j]=1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
weightChange= lastWeightChange;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void updateWeights(const std::vector<float> &input) override;
|
virtual void updateWeights(const std::vector<float> &input) override;
|
||||||
|
|
||||||
std::vector<std::vector<float>> deltasPrev;
|
std::vector<std::vector<float>> previousSlopes ={};
|
||||||
|
std::vector<std::vector<float>> lastWeightChange ={};
|
||||||
|
std::vector<std::vector<float>> weightChange ={};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -9,30 +9,12 @@ void NeuralNetwork::Learning::BackPropagation::teach(const std::vector<float> &i
|
|||||||
|
|
||||||
resize();
|
resize();
|
||||||
|
|
||||||
auto& outputLayer=network[network.size()-1];
|
computeDeltas(expectation);
|
||||||
for(std::size_t j=1;j<outputLayer.size();j++) {
|
|
||||||
auto& neuron = outputLayer[j];
|
|
||||||
deltas[network.size()-1][j]=correctionFunction->operator()( expectation[j-1], neuron.output())*
|
|
||||||
neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int layerIndex=static_cast<int>(network.size()-2);layerIndex>0;layerIndex--) {
|
|
||||||
auto &layer=network[layerIndex];
|
|
||||||
|
|
||||||
for(std::size_t j=1;j<layer.size();j++) {
|
|
||||||
float deltasWeight = 0;
|
|
||||||
|
|
||||||
for(std::size_t k=1;k<network[layerIndex+1].size();k++) {
|
|
||||||
deltasWeight+=deltas[layerIndex+1][k]* network[layerIndex+1][k].weight(j);
|
|
||||||
}
|
|
||||||
|
|
||||||
deltas[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
updateWeights(input);
|
updateWeights(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void NeuralNetwork::Learning::BackPropagation::updateWeights(const std::vector<float> &input) {
|
void NeuralNetwork::Learning::BackPropagation::updateWeights(const std::vector<float> &input) {
|
||||||
|
|
||||||
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
|
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
|
||||||
@@ -44,18 +26,43 @@ void NeuralNetwork::Learning::BackPropagation::updateWeights(const std::vector<f
|
|||||||
|
|
||||||
for(std::size_t j=1;j<layerSize;j++) {
|
for(std::size_t j=1;j<layerSize;j++) {
|
||||||
|
|
||||||
deltas[layerIndex][j]*=learningCoefficient;
|
float delta =slopes[layerIndex][j]*learningCoefficient;
|
||||||
|
|
||||||
layer[j].weight(0)+=deltas[layerIndex][j];
|
layer[j].weight(0)+=delta;
|
||||||
|
|
||||||
for(std::size_t k=1;k<prevLayerSize;k++) {
|
for(std::size_t k=1;k<prevLayerSize;k++) {
|
||||||
if(layerIndex==1) {
|
if(layerIndex==1) {
|
||||||
layer[j].weight(k)+=deltas[layerIndex][j]*input[k-1];
|
layer[j].weight(k)+=delta*input[k-1];
|
||||||
} else {
|
} else {
|
||||||
layer[j].weight(k)+=deltas[layerIndex][j]*prevLayer[k].output();
|
layer[j].weight(k)+=delta*prevLayer[k].output();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NeuralNetwork::Learning::BackPropagation::computeDeltas(const std::vector<float> &expectation) {
|
||||||
|
auto& outputLayer=network[network.size()-1];
|
||||||
|
for(std::size_t j=1;j<outputLayer.size();j++) {
|
||||||
|
auto& neuron = outputLayer[j];
|
||||||
|
slopes[network.size()-1][j]=correctionFunction->operator()( expectation[j-1], neuron.output())*
|
||||||
|
neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int layerIndex=static_cast<int>(network.size()-2);layerIndex>0;layerIndex--) {
|
||||||
|
auto &layer=network[layerIndex];
|
||||||
|
|
||||||
|
for(std::size_t j=1;j<layer.size();j++) {
|
||||||
|
float deltasWeight = 0;
|
||||||
|
|
||||||
|
for(std::size_t k=1;k<network[layerIndex+1].size();k++) {
|
||||||
|
deltasWeight+=slopes[layerIndex+1][k]* network[layerIndex+1][k].weight(j);
|
||||||
|
}
|
||||||
|
|
||||||
|
slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -14,22 +14,29 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
|
|||||||
|
|
||||||
for(std::size_t j=1;j<layerSize;j++) {
|
for(std::size_t j=1;j<layerSize;j++) {
|
||||||
|
|
||||||
//TODO: is this correct??
|
float newChange=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * lastWeightChange[layerIndex][j];
|
||||||
float delta=deltas[layerIndex][j]/(deltasPrev[layerIndex][j]-deltas[layerIndex][j]);
|
|
||||||
|
|
||||||
deltas[layerIndex][j]=delta;
|
// according to original paper
|
||||||
|
newChange+= slopes[layerIndex][j]*_epsilon;
|
||||||
|
|
||||||
layer[j].weight(0)+=delta;
|
if(newChange > lastWeightChange[layerIndex][j]*_maxChange) {
|
||||||
|
newChange=lastWeightChange[layerIndex][j];
|
||||||
|
}
|
||||||
|
|
||||||
|
weightChange[layerIndex][j]=newChange;
|
||||||
|
|
||||||
|
layer[j].weight(0)+=newChange;
|
||||||
|
|
||||||
for(std::size_t k=1;k<prevLayerSize;k++) {
|
for(std::size_t k=1;k<prevLayerSize;k++) {
|
||||||
if(layerIndex==1) {
|
if(layerIndex==1) {
|
||||||
layer[j].weight(k)+=delta*input[k-1];
|
layer[j].weight(k)+=newChange*(input[k-1]);
|
||||||
} else {
|
} else {
|
||||||
layer[j].weight(k)+=delta*prevLayer[k].output();
|
layer[j].weight(k)+=newChange*(prevLayer[k].output());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
deltas.swap(deltasPrev);
|
slopes.swap(previousSlopes);
|
||||||
|
weightChange.swap(lastWeightChange);
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user