added perfomance to readme and modified instructions
This commit is contained in:
@@ -15,9 +15,10 @@ Perfomace
|
|||||||
i5-5300U & 8GB ram
|
i5-5300U & 8GB ram
|
||||||
|
|
||||||
| date | feedforward_perf | recurrent_perf | backpropagation_perf |
|
| date | feedforward_perf | recurrent_perf | backpropagation_perf |
|
||||||
-------------------- | ---------------- | -------------- | -------------------- |
|
------------------------ | ---------------- | -------------- | -------------------- |
|
||||||
| FANN | 12.6 | | |
|
| FANN | 12.6 | | |
|
||||||
-------------------- | ---------------- | -------------- | -------------------- |
|
------------------------ | ---------------- | -------------- | -------------------- |
|
||||||
| 2016/02/07 initial | 8.27 sec | 7.15 sec | 6.00 sec |
|
| 2016/02/07 initial | 8.27 sec | 7.15 sec | 6.00 sec |
|
||||||
| 2016/02/17 AVX | 5.53 sec | 4.68 sec | 4.63 sec |
|
| 2016/02/17 AVX | 5.53 sec | 4.68 sec | 4.63 sec |
|
||||||
| 2016/02/17 weights | 5.53 sec | 4.68 sec | 3.02 sec |
|
| 2016/02/17 weights | 5.53 sec | 4.68 sec | 3.02 sec |
|
||||||
|
| 2016/02/18 neuron ref. | 5.53 sec | 4.68 sec | 1.02 sec |
|
||||||
@@ -19,12 +19,12 @@ float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector<float>
|
|||||||
partialSolution.avx=_mm256_setzero_ps();
|
partialSolution.avx=_mm256_setzero_ps();
|
||||||
|
|
||||||
for(size_t k=0;k<alignedPrev;k+=8) {
|
for(size_t k=0;k<alignedPrev;k+=8) {
|
||||||
//TODO: asignement!! -- possible speedup
|
//TODO: assignement!! -- possible speedup
|
||||||
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k)));
|
partialSolution.avx=_mm256_fmadd_ps(_mm256_loadu_ps(weightsData+k),_mm256_loadu_ps(inputData+k),partialSolution.avx);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(size_t k=alignedPrev;k<inputSize;k++) {
|
for(size_t k=alignedPrev;k<inputSize;k++) {
|
||||||
partialSolution.avx=_mm256_add_ps(partialSolution.avx,_mm256_mul_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0)));
|
partialSolution.avx=_mm256_fmadd_ps(_mm256_set_ps(weightsData[k],0,0,0,0,0,0,0),_mm256_set_ps(inputData[k],0,0,0,0,0,0,0),partialSolution.avx);
|
||||||
}
|
}
|
||||||
|
|
||||||
partialSolution.avx = _mm256_add_ps(partialSolution.avx, _mm256_permute2f128_ps(partialSolution.avx , partialSolution.avx , 1));
|
partialSolution.avx = _mm256_add_ps(partialSolution.avx, _mm256_permute2f128_ps(partialSolution.avx , partialSolution.avx , 1));
|
||||||
@@ -32,6 +32,7 @@ float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector<float>
|
|||||||
partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
|
partialSolution.avx = _mm256_hadd_ps(partialSolution.avx, partialSolution.avx);
|
||||||
|
|
||||||
return partialSolution.f[0];
|
return partialSolution.f[0];
|
||||||
|
|
||||||
#elif USE_SSE
|
#elif USE_SSE
|
||||||
|
|
||||||
std::size_t alignedPrev=inputSize-inputSize%4;
|
std::size_t alignedPrev=inputSize-inputSize%4;
|
||||||
|
|||||||
Reference in New Issue
Block a user