added perfomance to readme and modified instructions

2016-02-18 20:34:57 +01:00
parent 1a55a720eb
commit c45fd63591
2 changed files with 12 additions and 10 deletions
--- a/README.md
+++ b/README.md
@@ -14,10 +14,11 @@ Perfomace
 i5-5300U & 8GB ram
-| date               | feedforward_perf | recurrent_perf | backpropagation_perf |
+| date                   | feedforward_perf | recurrent_perf | backpropagation_perf |
-------------------- | ---------------- | -------------- | -------------------- |
+------------------------ | ---------------- | -------------- | -------------------- |
-| FANN               | 12.6             |                |                      |
+| FANN                   | 12.6             |                |                      |
-------------------- | ---------------- | -------------- | -------------------- |
+------------------------ | ---------------- | -------------- | -------------------- |
-| 2016/02/07 initial | 8.27 sec         | 7.15 sec       | 6.00 sec             |
+| 2016/02/07 initial     | 8.27 sec         | 7.15 sec       | 6.00 sec             |
-| 2016/02/17 AVX     | 5.53 sec         | 4.68 sec       | 4.63 sec             |
+| 2016/02/17 AVX         | 5.53 sec         | 4.68 sec       | 4.63 sec             |
-| 2016/02/17 weights | 5.53 sec         | 4.68 sec       | 3.02 sec             |
+| 2016/02/17 weights     | 5.53 sec         | 4.68 sec       | 3.02 sec             |
 | 2016/02/18 neuron ref. | 5.53 sec         | 4.68 sec       | 1.02 sec             |
--- a/src/NeuralNetwork/BasisFunction/Linear.cpp
+++ b/src/NeuralNetwork/BasisFunction/Linear.cpp
@@ -19,12 +19,12 @@ float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector<float>
 	partialSolution.avx=_mm256_setzero_ps();
 	for(size_t k=0;k<alignedPrev;k+=8) {
-		//TODO: asignement!! -- possible speedup
+		//TODO: assignement!! -- possible speedup
-		partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k)));
+		partialSolution.avx=_mm256_fmadd_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k),partialSolution.avx);
 	}
 	for(size_t k=alignedPrev;k<inputSize;k++) {
-		partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0)));
+		partialSolution.avx=_mm256_fmadd_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0),partialSolution.avx);
 	}
 	partialSolution.avx = _mm256_add_ps(partialSolution.avx,  _mm256_permute2f128_ps(partialSolution.avx , partialSolution.avx , 1));
@@ -32,6 +32,7 @@ float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector<float>
 	partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
 	return partialSolution.f[0];
 #elif USE_SSE
 	std::size_t alignedPrev=inputSize-inputSize%4;