modified SSE code building

This commit is contained in:
2016-02-07 22:54:43 +01:00
parent 5f43fb8cfb
commit bec7a4f3ae
10 changed files with 28 additions and 43 deletions

View File

@@ -17,7 +17,9 @@ namespace BasisFunction {
public:
Linear() {}
inline virtual float computeStreaming(const std::vector<float>& weights, const std::vector<float>& input) const override {
inline virtual float operator()(const std::vector<float>& weights, const std::vector<float>& input) const override {
#ifdef USE_SSE
size_t inputSize=input.size();
size_t alignedPrev=inputSize-inputSize%4;
@@ -35,24 +37,23 @@ namespace BasisFunction {
partialSolution.sse=_mm_add_ps(partialSolution.sse,_mm_mul_ps(_mm_load_ss(weightsData+k),_mm_load_ss(inputData+k)));
}
#ifdef USE_SSE2 //pre-SSE3 solution
#ifdef USE_SSE2 //pre-SSE3 solution
partialSolution.sse= _mm_add_ps(_mm_movehl_ps(partialSolution.sse, partialSolution.sse), partialSolution.sse);
partialSolution.sse=_mm_add_ss(partialSolution.sse, _mm_shuffle_ps(partialSolution.sse,partialSolution.sse, 1));
#else
#else
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
partialSolution.sse = _mm_hadd_ps(partialSolution.sse, partialSolution.sse);
#endif
#endif
return partialSolution.f[0];
}
#else
inline virtual float compute(const std::vector<float>& weights, const std::vector<float>& input) const override {
register float tmp = 0;
size_t inputSize=input.size();
for(size_t k=0;k<inputSize;k++) {
tmp+=input[k]*weights[k];
}
return tmp;
#endif
}
virtual BasisFunction* clone() const override {