moved Linear sintrigification to .cpp file and fixed err in neuron weights
This commit is contained in:
@@ -10,13 +10,13 @@ OPTION(ENABLE_TESTS "enables tests" ON)
|
|||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Weffc++ -Wshadow -Wstrict-aliasing -ansi -Woverloaded-virtual -Wdelete-non-virtual-dtor -Wno-unused-function")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Weffc++ -Wshadow -Wstrict-aliasing -ansi -Woverloaded-virtual -Wdelete-non-virtual-dtor -Wno-unused-function")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
|
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
|
||||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
|
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native -O3")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native -O3")
|
||||||
|
|
||||||
if(USE_AVX)
|
if(USE_AVX)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_AVX")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -DUSE_AVX")
|
||||||
endif(USE_AVX)
|
endif(USE_AVX)
|
||||||
|
|
||||||
if(USE_SSE)
|
if(USE_SSE)
|
||||||
@@ -49,6 +49,7 @@ set (LIBRARY_SOURCES
|
|||||||
include/NeuralNetwork/Stringifiable.h
|
include/NeuralNetwork/Stringifiable.h
|
||||||
src/NeuralNetwork/Learning/BackPropagation.cpp include/NeuralNetwork/Learning/BackPropagation.h
|
src/NeuralNetwork/Learning/BackPropagation.cpp include/NeuralNetwork/Learning/BackPropagation.h
|
||||||
include/sse_mathfun.h
|
include/sse_mathfun.h
|
||||||
|
src/NeuralNetwork/BasisFunction/Linear.cpp
|
||||||
src/NeuralNetwork/FeedForward/Layer.cpp
|
src/NeuralNetwork/FeedForward/Layer.cpp
|
||||||
src/NeuralNetwork/FeedForward/Network.cpp
|
src/NeuralNetwork/FeedForward/Network.cpp
|
||||||
src/NeuralNetwork/Recurrent/Network.cpp
|
src/NeuralNetwork/Recurrent/Network.cpp
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
#include "./StreamingBasisFunction.h"
|
#include "./StreamingBasisFunction.h"
|
||||||
|
|
||||||
#include "../../sse_mathfun.h"
|
#include "../../sse_mathfun.h"
|
||||||
@@ -18,74 +20,7 @@ namespace BasisFunction {
|
|||||||
public:
|
public:
|
||||||
Linear() {}
|
Linear() {}
|
||||||
|
|
||||||
inline virtual float operator()(const std::vector<float>& weights, const std::vector<float>& input) const override {
|
virtual float operator()(const std::vector<float>& weights, const std::vector<float>& input) const override;
|
||||||
#ifdef USE_AVX
|
|
||||||
//TODO: check sizes!!!
|
|
||||||
std::size_t inputSize=input.size();
|
|
||||||
size_t alignedPrev=inputSize-inputSize%8;
|
|
||||||
|
|
||||||
const float* weightsData=weights.data();
|
|
||||||
const float* inputData=input.data();
|
|
||||||
|
|
||||||
union {
|
|
||||||
__m256 avx;
|
|
||||||
float f[8];
|
|
||||||
} partialSolution;
|
|
||||||
|
|
||||||
partialSolution.avx=_mm256_setzero_ps();
|
|
||||||
|
|
||||||
for(size_t k=0;k<alignedPrev;k+=8) {
|
|
||||||
//TODO: asignement!! -- possible speedup
|
|
||||||
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k)));
|
|
||||||
}
|
|
||||||
|
|
||||||
for(size_t k=alignedPrev;k<inputSize;k++) {
|
|
||||||
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0)));
|
|
||||||
}
|
|
||||||
|
|
||||||
partialSolution.avx = _mm256_add_ps(partialSolution.avx, _mm256_permute2f128_ps(partialSolution.avx , partialSolution.avx , 1));
|
|
||||||
partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
|
|
||||||
partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
|
|
||||||
|
|
||||||
return partialSolution.f[0];
|
|
||||||
#else
|
|
||||||
#ifdef USE_SSE
|
|
||||||
size_t inputSize=input.size();
|
|
||||||
size_t alignedPrev=inputSize-inputSize%4;
|
|
||||||
|
|
||||||
const float* weightsData=weights.data();
|
|
||||||
const float* inputData=input.data();
|
|
||||||
vec4f partialSolution;
|
|
||||||
partialSolution.sse =_mm_setzero_ps();
|
|
||||||
|
|
||||||
//TODO prefetch ??
|
|
||||||
for(register size_t k=0;k<alignedPrev;k+=4) {
|
|
||||||
partialSolution.sse=_mm_add_ps(partialSolution.sse,_mm_mul_ps(_mm_load_ps(weightsData+k),_mm_load_ps(inputData+k)));
|
|
||||||
}
|
|
||||||
|
|
||||||
for(register size_t k=alignedPrev;k<inputSize;k++) {
|
|
||||||
partialSolution.sse=_mm_add_ps(partialSolution.sse,_mm_mul_ps(_mm_load_ss(weightsData+k),_mm_load_ss(inputData+k)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef USE_SSE2 //pre-SSE3 solution
|
|
||||||
partialSolution.sse= _mm_add_ps(_mm_movehl_ps(partialSolution.sse, partialSolution.sse), partialSolution.sse);
|
|
||||||
partialSolution.sse=_mm_add_ss(partialSolution.sse, _mm_shuffle_ps(partialSolution.sse,partialSolution.sse, 1));
|
|
||||||
#else
|
|
||||||
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
|
||||||
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
|
||||||
#endif
|
|
||||||
return partialSolution.f[0];
|
|
||||||
#else
|
|
||||||
|
|
||||||
register float tmp = 0;
|
|
||||||
size_t inputSize=input.size();
|
|
||||||
for(size_t k=0;k<inputSize;k++) {
|
|
||||||
tmp+=input[k]*weights[k];
|
|
||||||
}
|
|
||||||
return tmp;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual BasisFunction* clone() const override {
|
virtual BasisFunction* clone() const override {
|
||||||
return new Linear();
|
return new Linear();
|
||||||
|
|||||||
@@ -71,20 +71,8 @@ namespace FeedForward {
|
|||||||
}
|
}
|
||||||
|
|
||||||
using Stringifiable::stringify;
|
using Stringifiable::stringify;
|
||||||
virtual void stringify(std::ostream& out) const override {
|
virtual void stringify(std::ostream& out) const override;
|
||||||
out << "{" << std::endl;
|
|
||||||
out << "\t \"class\": \"NeuralNetwork::FeedForward::Layer\"," << std::endl;
|
|
||||||
out << "\t \"neurons\": [" << std::endl;
|
|
||||||
bool first=true;
|
|
||||||
for(auto &neuron: neurons) {
|
|
||||||
if(!first)
|
|
||||||
out << ", ";
|
|
||||||
out << neuron->stringify();
|
|
||||||
first=false;
|
|
||||||
}
|
|
||||||
out << "]";
|
|
||||||
out << "}";
|
|
||||||
}
|
|
||||||
protected:
|
protected:
|
||||||
std::vector<NeuronInterface*> neurons;
|
std::vector<NeuronInterface*> neurons;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ namespace NeuralNetwork
|
|||||||
Neuron(unsigned long _id=0, const ActivationFunction::ActivationFunction &activationFunction=ActivationFunction::Sigmoid(-4.9)):
|
Neuron(unsigned long _id=0, const ActivationFunction::ActivationFunction &activationFunction=ActivationFunction::Sigmoid(-4.9)):
|
||||||
NeuronInterface(), basis(new BasisFunction::Linear),
|
NeuronInterface(), basis(new BasisFunction::Linear),
|
||||||
activation(activationFunction.clone()),
|
activation(activationFunction.clone()),
|
||||||
id_(_id),weights(_id+1),_output(0),_value(0) {
|
id_(_id),weights(1),_output(0),_value(0) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Neuron(const Neuron &r): NeuronInterface(), basis(r.basis->clone()), activation(r.activation->clone()),id_(r.id_),
|
Neuron(const Neuron &r): NeuronInterface(), basis(r.basis->clone()), activation(r.activation->clone()),id_(r.id_),
|
||||||
@@ -151,8 +151,8 @@ namespace NeuralNetwork
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual void setInputSize(const std::size_t &size) override {
|
virtual void setInputSize(const std::size_t &size) override {
|
||||||
if(weights.size()<size+1) {
|
if(weights.size()<size) {
|
||||||
weights.resize(size+1);
|
weights.resize(size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
69
src/NeuralNetwork/BasisFunction/Linear.cpp
Normal file
69
src/NeuralNetwork/BasisFunction/Linear.cpp
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
#include <NeuralNetwork/BasisFunction/Linear.h>
|
||||||
|
|
||||||
|
float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector<float> &weights, const std::vector<float> &input) const {
|
||||||
|
assert(input.size()== weights.size());
|
||||||
|
std::size_t inputSize=input.size();
|
||||||
|
|
||||||
|
#ifdef USE_AVX
|
||||||
|
|
||||||
|
std::size_t alignedPrev=inputSize-inputSize%8;
|
||||||
|
|
||||||
|
const float* weightsData=weights.data();
|
||||||
|
const float* inputData=input.data();
|
||||||
|
|
||||||
|
union {
|
||||||
|
__m256 avx;
|
||||||
|
float f[8];
|
||||||
|
} partialSolution;
|
||||||
|
|
||||||
|
partialSolution.avx=_mm256_setzero_ps();
|
||||||
|
|
||||||
|
for(size_t k=0;k<alignedPrev;k+=8) {
|
||||||
|
//TODO: asignement!! -- possible speedup
|
||||||
|
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k)));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(size_t k=alignedPrev;k<inputSize;k++) {
|
||||||
|
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
partialSolution.avx = _mm256_add_ps(partialSolution.avx, _mm256_permute2f128_ps(partialSolution.avx , partialSolution.avx , 1));
|
||||||
|
partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
|
||||||
|
partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
|
||||||
|
|
||||||
|
return partialSolution.f[0];
|
||||||
|
#elif USE_SSE
|
||||||
|
|
||||||
|
std::size_t alignedPrev=inputSize-inputSize%4;
|
||||||
|
|
||||||
|
const float* weightsData=weights.data();
|
||||||
|
const float* inputData=input.data();
|
||||||
|
vec4f partialSolution;
|
||||||
|
partialSolution.sse =_mm_setzero_ps();
|
||||||
|
|
||||||
|
//TODO prefetch ??
|
||||||
|
for(register size_t k=0;k<alignedPrev;k+=4) {
|
||||||
|
partialSolution.sse=_mm_add_ps(partialSolution.sse,_mm_mul_ps(_mm_load_ps(weightsData+k),_mm_load_ps(inputData+k)));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(register size_t k=alignedPrev;k<inputSize;k++) {
|
||||||
|
partialSolution.sse=_mm_add_ps(partialSolution.sse,_mm_mul_ps(_mm_load_ss(weightsData+k),_mm_load_ss(inputData+k)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_SSE2 //pre-SSE3 solution
|
||||||
|
partialSolution.sse= _mm_add_ps(_mm_movehl_ps(partialSolution.sse, partialSolution.sse), partialSolution.sse);
|
||||||
|
partialSolution.sse=_mm_add_ss(partialSolution.sse, _mm_shuffle_ps(partialSolution.sse,partialSolution.sse, 1));
|
||||||
|
#else
|
||||||
|
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
||||||
|
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
||||||
|
#endif
|
||||||
|
return partialSolution.f[0];
|
||||||
|
#else
|
||||||
|
|
||||||
|
register float tmp = 0;
|
||||||
|
for(size_t k=0;k<inputSize;k++) {
|
||||||
|
tmp+=input[k]*weights[k];
|
||||||
|
}
|
||||||
|
return tmp;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
@@ -7,4 +7,19 @@ void NeuralNetwork::FeedForward::Layer::solve(const std::vector<float> &input, s
|
|||||||
output[neuron->id()]=neuron->operator()(input);
|
output[neuron->id()]=neuron->operator()(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NeuralNetwork::FeedForward::Layer::stringify(std::ostream &out) const {
|
||||||
|
out << "{" << std::endl;
|
||||||
|
out << "\t \"class\": \"NeuralNetwork::FeedForward::Layer\"," << std::endl;
|
||||||
|
out << "\t \"neurons\": [" << std::endl;
|
||||||
|
bool first=true;
|
||||||
|
for(auto &neuron: neurons) {
|
||||||
|
if(!first)
|
||||||
|
out << ", ";
|
||||||
|
out << neuron->stringify();
|
||||||
|
first=false;
|
||||||
|
}
|
||||||
|
out << "]";
|
||||||
|
out << "}";
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user