From d9614b5796da09247398f21affe38eeb27f1709e Mon Sep 17 00:00:00 2001 From: Shin Date: Tue, 23 Feb 2016 14:08:46 +0100 Subject: [PATCH] AVX fix for non-FMA CPU --- CMakeLists.txt | 1 - cmake/CPUFeatures | 14 ++++++++++++++ src/NeuralNetwork/BasisFunction/Linear.cpp | 13 +++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ca0f94..1871444 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,6 @@ if(CPU_SSE3_AVAILABLE) endif(CPU_SSE3_AVAILABLE) - OPTION(USE_AVX "If AVX instruction set should be used." OFF) OPTION(USE_SSE "If SSE instruction set should be used." OFF) OPTION(USE_SSE2 "If SSE 2 instruction set should be used." ON) diff --git a/cmake/CPUFeatures b/cmake/CPUFeatures index 2172282..8b2334e 100644 --- a/cmake/CPUFeatures +++ b/cmake/CPUFeatures @@ -10,6 +10,7 @@ macro(CHECK_CPU) MESSAGE("-- -- Found SSE2") set(CPU_SSE2_AVAILABLE "true" INTERNAL BOOL "SSE2 available on host") ELSE () + MESSAGE("-- -- Not found SSE2") ENDIF (SSE2_TRUE) STRING(REGEX REPLACE "^.*(sse3).*$" "\\1" SSE_THERE ${CPUINFO}) @@ -18,6 +19,7 @@ macro(CHECK_CPU) MESSAGE("-- -- Found SSE3") set(CPU_SSE3_AVAILABLE "true" INTERNAL BOOL "SSE3 available on host") ELSE () + MESSAGE("-- -- Not found SSE3") ENDIF (SSE3_TRUE) STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO}) @@ -26,6 +28,7 @@ macro(CHECK_CPU) MESSAGE("-- -- Found SSE4.2") set(CPU_SSE4.2_AVAILABLE "true" INTERNAL BOOL "SSE4.2 available on host") ELSE () + MESSAGE("-- -- Not found SSE4.2") ENDIF (SSE4.2_TRUE) STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO}) @@ -34,7 +37,18 @@ macro(CHECK_CPU) MESSAGE("-- -- Found AVX") set(CPU_AVX_AVAILABLE "TRUE" INTERNAL BOOL "AVX available on host") ELSE () + MESSAGE("-- -- Not found AVX") ENDIF (AVX_TRUE) + + STRING(REGEX REPLACE "^.*(fma).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "fma" "${SSE_THERE}" FMA_TRUE) + IF (FMA_TRUE) + MESSAGE("-- -- Found FMA") + set(CPU_FMA_AVAILABLE "TRUE" INTERNAL BOOL "FMA available on host") + ELSE () + MESSAGE("-- -- Not found FMA") + ENDIF (FMA_TRUE) + else() MESSAGE("Error detecting CPU features") endif(CMAKE_SYSTEM_NAME MATCHES "Linux") diff --git a/src/NeuralNetwork/BasisFunction/Linear.cpp b/src/NeuralNetwork/BasisFunction/Linear.cpp index c676375..5d74def 100644 --- a/src/NeuralNetwork/BasisFunction/Linear.cpp +++ b/src/NeuralNetwork/BasisFunction/Linear.cpp @@ -18,13 +18,26 @@ float NeuralNetwork::BasisFunction::Linear::operator()(const std::vector partialSolution.avx=_mm256_setzero_ps(); +#ifndef USE_FMA + __m256 tmp; +#endif for(size_t k=0;k