refactored propagation
This commit is contained in:
@@ -1,89 +1,23 @@
|
||||
#include <NeuralNetwork/Learning/BackPropagation.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <immintrin.h>
|
||||
|
||||
void NeuralNetwork::Learning::BackPropagation::teach(const std::vector<float> &input, const std::vector<float> &expectation) {
|
||||
network.computeOutput(input);
|
||||
resize();
|
||||
computeSlopes(expectation);
|
||||
|
||||
computeDeltas(input);
|
||||
if(++currentBatchSize >= batchSize) {
|
||||
updateWeights();
|
||||
endBatch();
|
||||
currentBatchSize=0;
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::BackPropagation::computeSlopes(const std::vector<float> &expectation) {
|
||||
auto& outputLayer=network[network.size()-1];
|
||||
for(std::size_t j=1;j<outputLayer.size();j++) {
|
||||
auto& neuron = outputLayer[j];
|
||||
slopes[network.size()-1][j]=correctionFunction->operator()( expectation[j-1], neuron.output())*
|
||||
neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
|
||||
}
|
||||
|
||||
for(int layerIndex=static_cast<int>(network.size()-2);layerIndex>0;layerIndex--) {
|
||||
auto &layer=network[layerIndex];
|
||||
|
||||
for(std::size_t j=1;j<layer.size();j++) {
|
||||
float deltasWeight = 0;
|
||||
|
||||
for(std::size_t k=1;k<network[layerIndex+1].size();k++) {
|
||||
deltasWeight+=slopes[layerIndex+1][k]* network[layerIndex+1][k].weight(j);
|
||||
}
|
||||
|
||||
slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::BackPropagation::computeDeltas(const std::vector<float> &input) {
|
||||
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
|
||||
auto &layer=network[layerIndex];
|
||||
auto &prevLayer=network[layerIndex-1];
|
||||
|
||||
std::size_t prevLayerSize=prevLayer.size();
|
||||
std::size_t layerSize=layer.size();
|
||||
|
||||
for(std::size_t j=1;j<layerSize;j++) {
|
||||
float update = slopes[layerIndex][j];
|
||||
for(std::size_t k=0;k<prevLayerSize;k++) {
|
||||
float inputValue = 0.0;
|
||||
if(layerIndex==1 && k!=0) {
|
||||
inputValue = input[k-1];
|
||||
} else {
|
||||
inputValue= prevLayer[k].output();
|
||||
}
|
||||
if(currentBatchSize == 0) {
|
||||
deltas[layerIndex][j][k] = update * inputValue;
|
||||
} else {
|
||||
deltas[layerIndex][j][k] += update * inputValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::BackPropagation::updateWeights() {
|
||||
void NeuralNetwork::Learning::BackPropagation::updateWeightsAndEndBatch() {
|
||||
|
||||
bool enableMoments = momentumWeight > 0.0;
|
||||
|
||||
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
|
||||
auto &layer = network[layerIndex];
|
||||
auto &prevLayer = network[layerIndex - 1];
|
||||
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
|
||||
auto &layer = _network[layerIndex];
|
||||
auto &prevLayer = _network[layerIndex - 1];
|
||||
|
||||
std::size_t prevLayerSize = prevLayer.size();
|
||||
std::size_t layerSize = layer.size();
|
||||
|
||||
for(std::size_t j = 1; j < layerSize; j++) {
|
||||
for(std::size_t k = 0; k < prevLayerSize; k++) {
|
||||
float delta = deltas[layerIndex][j][k]*learningCoefficient - weightDecay * layer[j].weight(k);
|
||||
float delta = _gradients[layerIndex][j][k]*learningCoefficient - weightDecay * layer[j].weight(k);
|
||||
|
||||
if(enableMoments) {
|
||||
delta += momentumWeight * lastDeltas[layerIndex][j][k];
|
||||
lastDeltas[layerIndex][j][k]=delta;
|
||||
delta += momentumWeight * _lastDeltas[layerIndex][j][k];
|
||||
_lastDeltas[layerIndex][j][k]=delta;
|
||||
}
|
||||
|
||||
layer[j].weight(k)+= delta;
|
||||
|
||||
92
src/NeuralNetwork/Learning/BatchPropagation.cpp
Normal file
92
src/NeuralNetwork/Learning/BatchPropagation.cpp
Normal file
@@ -0,0 +1,92 @@
|
||||
#include <NeuralNetwork/Learning/BatchPropagation.h>
|
||||
|
||||
void NeuralNetwork::Learning::BatchPropagation::teach(const std::vector<float> &input, const std::vector<float> &expectation) {
|
||||
_network.computeOutput(input);
|
||||
if(!init) {
|
||||
resize();
|
||||
init = true;
|
||||
}
|
||||
|
||||
computeSlopes(expectation);
|
||||
|
||||
computeDeltas(input);
|
||||
if(++_currentBatchSize >= _batchSize) {
|
||||
finishTeaching();
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::BatchPropagation::finishTeaching() {
|
||||
updateWeightsAndEndBatch();
|
||||
_currentBatchSize=0;
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::BatchPropagation::computeSlopes(const std::vector<float> &expectation) {
|
||||
const auto& outputLayer=_network[_network.size()-1];
|
||||
for(std::size_t j=1;j<outputLayer.size();j++) {
|
||||
const auto& neuron = outputLayer[j];
|
||||
_slopes[_network.size()-1][j]=_correctionFunction->operator()( expectation[j-1], neuron.output())*
|
||||
neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
|
||||
}
|
||||
|
||||
for(int layerIndex=static_cast<int>(_network.size()-2);layerIndex>0;layerIndex--) {
|
||||
auto &layer=_network[layerIndex];
|
||||
|
||||
for(std::size_t j=1;j<layer.size();j++) {
|
||||
float deltasWeight = 0;
|
||||
|
||||
for(std::size_t k=1;k<_network[layerIndex+1].size();k++) {
|
||||
deltasWeight+=_slopes[layerIndex+1][k]* _network[layerIndex+1][k].weight(j);
|
||||
}
|
||||
|
||||
_slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::BatchPropagation::computeDeltas(const std::vector<float> &input) {
|
||||
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
|
||||
auto &layer=_network[layerIndex];
|
||||
auto &prevLayer=_network[layerIndex-1];
|
||||
|
||||
std::size_t prevLayerSize=prevLayer.size();
|
||||
std::size_t layerSize=layer.size();
|
||||
|
||||
for(std::size_t j=1;j<layerSize;j++) {
|
||||
float update = _slopes[layerIndex][j];
|
||||
for(std::size_t k=0;k<prevLayerSize;k++) {
|
||||
float inputValue = 0.0;
|
||||
if(layerIndex==1 && k!=0) {
|
||||
inputValue = input[k-1];
|
||||
} else {
|
||||
inputValue= prevLayer[k].output();
|
||||
}
|
||||
if(_currentBatchSize == 0) {
|
||||
_gradients[layerIndex][j][k] = update * inputValue;
|
||||
} else {
|
||||
_gradients[layerIndex][j][k] += update * inputValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::BatchPropagation::resize() {
|
||||
_slopes.resize(_network.size());
|
||||
|
||||
for(std::size_t i=0; i < _network.size(); i++) {
|
||||
_slopes[i].resize(_network[i].size());
|
||||
}
|
||||
|
||||
_gradients.resize(_network.size());
|
||||
|
||||
for(std::size_t i = 0; i < _network.size(); i++) {
|
||||
_gradients[i].resize(_network[i].size());
|
||||
if(i > 0) {
|
||||
for(std::size_t j = 0; j < _gradients[i].size(); j++) {
|
||||
_gradients[i][j].resize(_network[i - 1].size());
|
||||
std::fill(_gradients[i][j].begin(), _gradients[i][j].end(), 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -3,7 +3,7 @@
|
||||
#include <cassert>
|
||||
#include <immintrin.h>
|
||||
|
||||
void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<float> &input) {
|
||||
void NeuralNetwork::Learning::QuickPropagation::computeDeltas(const std::vector<float> &input) {
|
||||
|
||||
float shrinkFactor=_maxChange/(_maxChange+1.0);
|
||||
|
||||
@@ -15,20 +15,39 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
|
||||
std::size_t layerSize=layer.size();
|
||||
|
||||
for(std::size_t j=1;j<layerSize;j++) {
|
||||
for(std::size_t k=1;k<layerSize;k++) {
|
||||
double update = 0.0;
|
||||
|
||||
|
||||
|
||||
double inputValue = 0.0;
|
||||
if(layerIndex==1) {
|
||||
inputValue=input[k-1];
|
||||
} else {
|
||||
inputValue=prevLayer[k].output();
|
||||
}
|
||||
|
||||
if(currentBatchSize == 0) {
|
||||
gradients[layerIndex][j][k] = update * inputValue;
|
||||
} else {
|
||||
gradients[layerIndex][j][k] += update * inputValue;
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
float newChange=0;
|
||||
|
||||
if(fabs (deltas[layerIndex][j])> 0.0001) {
|
||||
if(std::signbit(deltas[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
|
||||
if(fabs (gradients[layerIndex][j])> 0.0001) {
|
||||
if(std::signbit(gradients[layerIndex][j]) == std::signbit(slopes[layerIndex][j])) {
|
||||
newChange+= slopes[layerIndex][j]*_epsilon;
|
||||
|
||||
if(fabs(slopes[layerIndex][j]) > fabs(shrinkFactor * previousSlopes[layerIndex][j])) {
|
||||
newChange += _maxChange * deltas[layerIndex][j];
|
||||
newChange += _maxChange * gradients[layerIndex][j];
|
||||
}else {
|
||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
|
||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
|
||||
}
|
||||
} else {
|
||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * deltas[layerIndex][j];
|
||||
newChange+=slopes[layerIndex][j]/(previousSlopes[layerIndex][j]-slopes[layerIndex][j]) * gradients[layerIndex][j];
|
||||
}
|
||||
} else {
|
||||
newChange+= slopes[layerIndex][j]*_epsilon;
|
||||
@@ -45,9 +64,7 @@ void NeuralNetwork::Learning::QuickPropagation::updateWeights(const std::vector<
|
||||
layer[j].weight(k)+=newChange*(prevLayer[k].output());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
slopes.swap(previousSlopes);
|
||||
weightChange.swap(deltas);
|
||||
}
|
||||
@@ -1,101 +1,37 @@
|
||||
#include <NeuralNetwork/Learning/RProp.h>
|
||||
|
||||
void NeuralNetwork::Learning::RProp::teach(const std::vector<float> &input, const std::vector<float> &expectation) {
|
||||
network.computeOutput(input);
|
||||
resize();
|
||||
computeSlopes(expectation);
|
||||
void NeuralNetwork::Learning::RProp::updateWeightsAndEndBatch() {
|
||||
|
||||
computeDeltas(input);
|
||||
if(++currentBatchSize >= batchSize) {
|
||||
updateWeights();
|
||||
endBatch();
|
||||
currentBatchSize=0;
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::RProp::computeSlopes(const std::vector<float> &expectation) {
|
||||
auto& outputLayer=network[network.size()-1];
|
||||
for(std::size_t j=1;j<outputLayer.size();j++) {
|
||||
auto& neuron = outputLayer[j];
|
||||
slopes[network.size()-1][j]=correctionFunction->operator()( expectation[j-1], neuron.output())*
|
||||
neuron.getActivationFunction().derivatedOutput(neuron.value(),neuron.output());
|
||||
}
|
||||
|
||||
for(int layerIndex=static_cast<int>(network.size()-2);layerIndex>0;layerIndex--) {
|
||||
auto &layer=network[layerIndex];
|
||||
|
||||
for(std::size_t j=1;j<layer.size();j++) {
|
||||
float deltasWeight = 0;
|
||||
|
||||
for(std::size_t k=1;k<network[layerIndex+1].size();k++) {
|
||||
deltasWeight+=slopes[layerIndex+1][k]* network[layerIndex+1][k].weight(j);
|
||||
}
|
||||
|
||||
slopes[layerIndex][j]=deltasWeight*layer[j].getActivationFunction().derivatedOutput(layer[j].value(),layer[j].output());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::RProp::computeDeltas(const std::vector<float> &input) {
|
||||
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
|
||||
auto &layer=network[layerIndex];
|
||||
auto &prevLayer=network[layerIndex-1];
|
||||
|
||||
std::size_t prevLayerSize=prevLayer.size();
|
||||
std::size_t layerSize=layer.size();
|
||||
|
||||
for(std::size_t j=1;j<layerSize;j++) {
|
||||
float update = slopes[layerIndex][j];
|
||||
for(std::size_t k=0;k<prevLayerSize;k++) {
|
||||
float inputValue = 0.0;
|
||||
if(layerIndex==1 && k!=0) {
|
||||
inputValue = input[k-1];
|
||||
} else {
|
||||
inputValue= prevLayer[k].output();
|
||||
}
|
||||
if(currentBatchSize == 0) {
|
||||
gradients[layerIndex][j][k] = update * inputValue;
|
||||
} else {
|
||||
gradients[layerIndex][j][k] += update * inputValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NeuralNetwork::Learning::RProp::updateWeights() {
|
||||
|
||||
for(std::size_t layerIndex=1;layerIndex<network.size();layerIndex++) {
|
||||
auto &layer = network[layerIndex];
|
||||
auto &prevLayer = network[layerIndex - 1];
|
||||
for(std::size_t layerIndex=1;layerIndex<_network.size();layerIndex++) {
|
||||
auto &layer = _network[layerIndex];
|
||||
auto &prevLayer = _network[layerIndex - 1];
|
||||
|
||||
std::size_t prevLayerSize = prevLayer.size();
|
||||
std::size_t layerSize = layer.size();
|
||||
|
||||
for(std::size_t j = 1; j < layerSize; j++) {
|
||||
for(std::size_t k = 0; k < prevLayerSize; k++) {
|
||||
float gradient = gradients[layerIndex][j][k];
|
||||
float lastGradient = lastGradients[layerIndex][j][k];
|
||||
float gradient = _gradients[layerIndex][j][k];
|
||||
float lastGradient = _lastGradients[layerIndex][j][k];
|
||||
|
||||
lastGradients[layerIndex][j][k] = gradient;
|
||||
_lastGradients[layerIndex][j][k] = gradient;
|
||||
|
||||
float weightChangeDelta = lastWeightChanges[layerIndex][j][k];
|
||||
float weightChangeDelta = _lastWeightChanges[layerIndex][j][k];
|
||||
|
||||
if(gradient * lastGradient > 0) {
|
||||
weightChangeDelta = std::min(weightChangeDelta*weightChangePlus,maxChangeOfWeights);
|
||||
} else if (gradient * lastGradient < 0) {
|
||||
weightChangeDelta = std::max(weightChangeDelta*weightChangeMinus,minChangeOfWeights);
|
||||
} else {
|
||||
weightChangeDelta = lastWeightChanges[layerIndex][j][k];
|
||||
weightChangeDelta = _lastWeightChanges[layerIndex][j][k];
|
||||
}
|
||||
|
||||
lastWeightChanges[layerIndex][j][k] = weightChangeDelta;
|
||||
_lastWeightChanges[layerIndex][j][k] = weightChangeDelta;
|
||||
|
||||
if(gradient > 0) {
|
||||
layer[j].weight(k) += weightChangeDelta;
|
||||
} else if (gradient < 0){
|
||||
layer[j].weight(k) -= weightChangeDelta;
|
||||
} else {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user