added perfomance to readme and modified instructions

This commit is contained in:
2016-02-18 20:34:57 +01:00
parent 1a55a720eb
commit c45fd63591
2 changed files with 12 additions and 10 deletions

View File

@@ -19,12 +19,12 @@ float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector<float>
partialSolution.avx=_mm256_setzero_ps();
for(size_t k=0;k<alignedPrev;k+=8) {
//TODO: asignement!! -- possible speedup
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k)));
//TODO: assignement!! -- possible speedup
partialSolution.avx=_mm256_fmadd_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k),partialSolution.avx);
}
for(size_t k=alignedPrev;k<inputSize;k++) {
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0)));
partialSolution.avx=_mm256_fmadd_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0),partialSolution.avx);
}
partialSolution.avx = _mm256_add_ps(partialSolution.avx, _mm256_permute2f128_ps(partialSolution.avx , partialSolution.avx , 1));
@@ -32,6 +32,7 @@ float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector<float>
partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
return partialSolution.f[0];
#elif USE_SSE
std::size_t alignedPrev=inputSize-inputSize%4;