modified SSE code building
This commit is contained in:
@@ -18,7 +18,7 @@ namespace ActivationFunction {
|
|||||||
* @brief Returns derivation of output, it is slower than version with output as it needs to compute output
|
* @brief Returns derivation of output, it is slower than version with output as it needs to compute output
|
||||||
* @param input is input of function
|
* @param input is input of function
|
||||||
*/
|
*/
|
||||||
inline float derivatedOutput(const float &input) {
|
inline float derivatedOutput(const float &input) const {
|
||||||
return derivatedOutput(input,operator()(input));
|
return derivatedOutput(input,operator()(input));
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -28,13 +28,13 @@ namespace ActivationFunction {
|
|||||||
* @param output is output of function
|
* @param output is output of function
|
||||||
* @see derivatedOutput
|
* @see derivatedOutput
|
||||||
*/
|
*/
|
||||||
virtual float derivatedOutput(const float &input, const float &output) =0;
|
virtual float derivatedOutput(const float &input, const float &output) const=0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Returns value of output
|
* @brief Returns value of output
|
||||||
* @param x is input of function
|
* @param x is input of function
|
||||||
*/
|
*/
|
||||||
virtual float operator()(const float &x)=0;
|
virtual float operator()(const float &x) const=0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Function returns clone of object
|
* @brief Function returns clone of object
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ namespace ActivationFunction {
|
|||||||
public:
|
public:
|
||||||
Heaviside(const float &lambdaP=1.0): lambda(lambdaP) {}
|
Heaviside(const float &lambdaP=1.0): lambda(lambdaP) {}
|
||||||
|
|
||||||
inline virtual float derivatedOutput(const float &,const float &) override { return 1.0; }
|
inline virtual float derivatedOutput(const float &,const float &) const override { return 1.0; }
|
||||||
inline virtual float operator()(const float &x) override { return x>lambda ? 1.0f : 0.0f; };
|
inline virtual float operator()(const float &x) const override { return x>lambda ? 1.0f : 0.0f; };
|
||||||
|
|
||||||
virtual ActivationFunction* clone() const override {
|
virtual ActivationFunction* clone() const override {
|
||||||
return new Heaviside(lambda);
|
return new Heaviside(lambda);
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ namespace ActivationFunction {
|
|||||||
public:
|
public:
|
||||||
HyperbolicTangent(const float& lam=1):lambda(lam) {}
|
HyperbolicTangent(const float& lam=1):lambda(lam) {}
|
||||||
|
|
||||||
inline virtual float derivatedOutput(const float &,const float &output) override { return lambda*(1-output*output); }
|
inline virtual float derivatedOutput(const float &,const float &output) const override { return lambda*(1-output*output); }
|
||||||
|
|
||||||
inline virtual float operator()(const float &x) override { return tanh(lambda*x); };
|
inline virtual float operator()(const float &x) const override { return tanh(lambda*x); };
|
||||||
virtual ActivationFunction* clone() const override {
|
virtual ActivationFunction* clone() const override {
|
||||||
return new HyperbolicTangent(lambda);
|
return new HyperbolicTangent(lambda);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ namespace ActivationFunction {
|
|||||||
public:
|
public:
|
||||||
Linear(const float &lambdaP=1.0): lambda(lambdaP) {}
|
Linear(const float &lambdaP=1.0): lambda(lambdaP) {}
|
||||||
|
|
||||||
inline virtual float derivatedOutput(const float &,const float &) override { return lambda; }
|
inline virtual float derivatedOutput(const float &,const float &) const override { return lambda; }
|
||||||
|
|
||||||
inline virtual float operator()(const float &x) override { return x*lambda; };
|
inline virtual float operator()(const float &x) const override { return x*lambda; };
|
||||||
|
|
||||||
virtual ActivationFunction* clone() const override {
|
virtual ActivationFunction* clone() const override {
|
||||||
return new Linear(lambda);
|
return new Linear(lambda);
|
||||||
|
|||||||
@@ -16,10 +16,11 @@ namespace ActivationFunction {
|
|||||||
public:
|
public:
|
||||||
Sigmoid(const float lambdaP = -0.5): lambda(lambdaP) {}
|
Sigmoid(const float lambdaP = -0.5): lambda(lambdaP) {}
|
||||||
|
|
||||||
inline virtual float derivatedOutput(const float &, const float &output) override { return -lambda*output*(1.0f-output); }
|
inline virtual float derivatedOutput(const float &, const float &output) const override { return -lambda*output*(1.0f-output); }
|
||||||
|
|
||||||
inline virtual float operator()(const float &x) override { return 1.0f / (1.0f +exp(lambda*x) ); };
|
inline virtual float operator()(const float &x) const override { return 1.0f / (1.0f +exp(lambda*x) ); };
|
||||||
inline virtual __m128 operator()(const __m128 &x) override {
|
|
||||||
|
inline virtual __m128 operator()(const __m128 &x) const override {
|
||||||
// exp_ps is extremly slow!
|
// exp_ps is extremly slow!
|
||||||
return _mm_div_ps(_mm_set1_ps(1.0),_mm_add_ps(exp_ps(_mm_mul_ps(_mm_set1_ps(lambda),x)),_mm_set1_ps(1.0)));
|
return _mm_div_ps(_mm_set1_ps(1.0),_mm_add_ps(exp_ps(_mm_mul_ps(_mm_set1_ps(lambda),x)),_mm_set1_ps(1.0)));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,13 +14,13 @@ namespace ActivationFunction {
|
|||||||
class StreamingActivationFunction : public ActivationFunction {
|
class StreamingActivationFunction : public ActivationFunction {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
virtual float operator()(const float &x)=0;
|
virtual float operator()(const float &x) const=0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Returns value of four outputs
|
* @brief Returns value of four outputs
|
||||||
* @param x is float[4], in every array value can be stored
|
* @param x is float[4], in every array value can be stored
|
||||||
*/
|
*/
|
||||||
virtual __m128 operator()(const __m128 &x)=0;
|
virtual __m128 operator()(const __m128 &x) const=0;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -17,7 +17,9 @@ namespace BasisFunction {
|
|||||||
public:
|
public:
|
||||||
Linear() {}
|
Linear() {}
|
||||||
|
|
||||||
inline virtual float computeStreaming(const std::vector<float>& weights, const std::vector<float>& input) const override {
|
inline virtual float operator()(const std::vector<float>& weights, const std::vector<float>& input) const override {
|
||||||
|
|
||||||
|
#ifdef USE_SSE
|
||||||
size_t inputSize=input.size();
|
size_t inputSize=input.size();
|
||||||
size_t alignedPrev=inputSize-inputSize%4;
|
size_t alignedPrev=inputSize-inputSize%4;
|
||||||
|
|
||||||
@@ -35,24 +37,23 @@ namespace BasisFunction {
|
|||||||
partialSolution.sse=_mm_add_ps(partialSolution.sse,_mm_mul_ps(_mm_load_ss(weightsData+k),_mm_load_ss(inputData+k)));
|
partialSolution.sse=_mm_add_ps(partialSolution.sse,_mm_mul_ps(_mm_load_ss(weightsData+k),_mm_load_ss(inputData+k)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_SSE2 //pre-SSE3 solution
|
#ifdef USE_SSE2 //pre-SSE3 solution
|
||||||
partialSolution.sse= _mm_add_ps(_mm_movehl_ps(partialSolution.sse, partialSolution.sse), partialSolution.sse);
|
partialSolution.sse= _mm_add_ps(_mm_movehl_ps(partialSolution.sse, partialSolution.sse), partialSolution.sse);
|
||||||
partialSolution.sse=_mm_add_ss(partialSolution.sse, _mm_shuffle_ps(partialSolution.sse,partialSolution.sse, 1));
|
partialSolution.sse=_mm_add_ss(partialSolution.sse, _mm_shuffle_ps(partialSolution.sse,partialSolution.sse, 1));
|
||||||
#else
|
#else
|
||||||
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
||||||
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return partialSolution.f[0];
|
return partialSolution.f[0];
|
||||||
}
|
#else
|
||||||
|
|
||||||
inline virtual float compute(const std::vector<float>& weights, const std::vector<float>& input) const override {
|
|
||||||
register float tmp = 0;
|
register float tmp = 0;
|
||||||
size_t inputSize=input.size();
|
size_t inputSize=input.size();
|
||||||
for(size_t k=0;k<inputSize;k++) {
|
for(size_t k=0;k<inputSize;k++) {
|
||||||
tmp+=input[k]*weights[k];
|
tmp+=input[k]*weights[k];
|
||||||
}
|
}
|
||||||
return tmp;
|
return tmp;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual BasisFunction* clone() const override {
|
virtual BasisFunction* clone() const override {
|
||||||
|
|||||||
@@ -13,13 +13,7 @@ namespace BasisFunction {
|
|||||||
float f[4];
|
float f[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual float operator()(const std::vector<float>& weights, const std::vector<float>& input) const override {
|
virtual float operator()(const std::vector<float>& weights, const std::vector<float>& input) const =0;
|
||||||
return computeStreaming(weights,input);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual float computeStreaming(const std::vector<float>& weights, const std::vector<float>& input) const =0;
|
|
||||||
|
|
||||||
virtual float compute(const std::vector<float>& weights, const std::vector<float>& input) const =0;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -8,13 +8,11 @@
|
|||||||
int main() {
|
int main() {
|
||||||
{
|
{
|
||||||
NeuralNetwork::BasisFunction::Linear l;
|
NeuralNetwork::BasisFunction::Linear l;
|
||||||
assert(39.0==l.compute({1,2,3,5},{1,2,3,5}));
|
assert(39.0==l({1,2,3,5},{1,2,3,5}));
|
||||||
assert(39.0==l.computeStreaming({1,2,3,5},{1,2,3,5}));
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
NeuralNetwork::BasisFunction::Linear l;
|
NeuralNetwork::BasisFunction::Linear l;
|
||||||
assert(88.0==l.computeStreaming({1,2,3,5,7},{1,2,3,5,7}));
|
assert(88.0==l({1,2,3,5,7},{1,2,3,5,7}));
|
||||||
assert(88.0==l.compute({1,2,3,5,7},{1,2,3,5,7}));
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
NeuralNetwork::BasisFunction::Linear l;
|
NeuralNetwork::BasisFunction::Linear l;
|
||||||
@@ -22,8 +20,7 @@ int main() {
|
|||||||
for(int in=0;in<100;in++) {
|
for(int in=0;in<100;in++) {
|
||||||
w.push_back(2);
|
w.push_back(2);
|
||||||
}
|
}
|
||||||
assert(400.0==l.computeStreaming(w,w));
|
assert(400.0==l(w,w));
|
||||||
assert(400.0==l.compute(w,w));
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
NeuralNetwork::BasisFunction::Linear l;
|
NeuralNetwork::BasisFunction::Linear l;
|
||||||
@@ -31,8 +28,7 @@ int main() {
|
|||||||
for(int in=0;in<55;in++) {
|
for(int in=0;in<55;in++) {
|
||||||
w.push_back(2);
|
w.push_back(2);
|
||||||
}
|
}
|
||||||
assert(220.0==l.computeStreaming(w,w));
|
assert(220.0==l(w,w));
|
||||||
assert(220.0==l.compute(w,w));
|
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
NeuralNetwork::BasisFunction::Product l;
|
NeuralNetwork::BasisFunction::Product l;
|
||||||
|
|||||||
@@ -3,13 +3,6 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
void printVec(const std::vector<float> &v) {
|
|
||||||
for(int i=0;i<v.size();i++) {
|
|
||||||
std::cout << v[i] << ", ";
|
|
||||||
}
|
|
||||||
std::cout<< "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
{ // XOR problem
|
{ // XOR problem
|
||||||
NeuralNetwork::FeedForward::Network n(2);
|
NeuralNetwork::FeedForward::Network n(2);
|
||||||
|
|||||||
Reference in New Issue
Block a user