From 51fbd3cf7db14399bc1c69c4a5fd63f157fd209d Mon Sep 17 00:00:00 2001 From: Randy MacLeod Date: Wed, 26 Apr 2017 21:33:29 -0400 Subject: opencv: disable broken Intel FP16 detection With opencv-3.2, the configuration detects if the target supports half-precision floating-point format. This fails to compile for some Intel targets such as skylake with an error such as: error: '_mm_cvtph_ps' was not declared in this scope The configuration worked in opencv-3.1 so revert two commits to drop the FP16 detection even though it may make opencv slower. The only change in the configure log is: Signed-off-by: Martin Jansa --- .../0001-Revert-cuda-fix-fp16-compilation.patch | 27 +++ ...vert-check-FP16-build-condition-correctly.patch | 245 +++++++++++++++++++++ meta-oe/recipes-support/opencv/opencv_3.2.bb | 2 + 3 files changed, 274 insertions(+) create mode 100644 meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch create mode 100644 meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch diff --git a/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch new file mode 100644 index 0000000000..507d7968a0 --- /dev/null +++ b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch @@ -0,0 +1,27 @@ +From 69f9707678190f6a0948a547dce948251f972676 Mon Sep 17 00:00:00 2001 +From: Randy MacLeod +Date: Wed, 26 Apr 2017 14:57:30 -0400 +Subject: [PATCH 1/2] Revert "cuda: fix fp16 compilation" + +This reverts commit 12e00827be40576b686ea4438a6e6ef85208743d. +--- + modules/core/include/opencv2/core/cvdef.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h +index 699b166..efc24ca 100644 +--- a/modules/core/include/opencv2/core/cvdef.h ++++ b/modules/core/include/opencv2/core/cvdef.h +@@ -303,8 +303,7 @@ enum CpuFeatures { + #define CV_2PI 6.283185307179586476925286766559 + #define CV_LOG2 0.69314718055994530941723212145818 + +-#if defined __ARM_FP16_FORMAT_IEEE \ +- && !defined __CUDACC__ ++#if defined (__ARM_FP16_FORMAT_IEEE) + # define CV_FP16_TYPE 1 + #else + # define CV_FP16_TYPE 0 +-- +2.9.3 + diff --git a/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch new file mode 100644 index 0000000000..d1950a9361 --- /dev/null +++ b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch @@ -0,0 +1,245 @@ +From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001 +From: Randy MacLeod +Date: Wed, 26 Apr 2017 15:00:32 -0400 +Subject: [PATCH 2/2] Revert "check FP16 build condition correctly" + +This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13. + +Fix up revert conflicts to take previous behaviour. +--- + cmake/OpenCVCompilerOptions.cmake | 45 +++++++++-------------- + modules/core/include/opencv2/core/cvdef.h | 2 +- + modules/core/src/convert.cpp | 11 +++--- + modules/core/test/test_intrin.cpp | 60 ++++++++++++++----------------- + 4 files changed, 48 insertions(+), 70 deletions(-) + +diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake +index 5bb0479..4b19fdb 100644 +--- a/cmake/OpenCVCompilerOptions.cmake ++++ b/cmake/OpenCVCompilerOptions.cmake +@@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) + add_extra_compiler_option("-mfp16-format=ieee") + endif(ARM) + if(ENABLE_NEON) +- add_extra_compiler_option("-mfpu=neon") ++ add_extra_compiler_option("-mfpu=neon-fp16") + endif() + if(ENABLE_VFPV3 AND NOT ENABLE_NEON) + add_extra_compiler_option("-mfpu=vfpv3") +@@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) + add_extra_compiler_option(-fvisibility-inlines-hidden) + endif() + +-if(NOT OPENCV_FP16_DISABLE AND NOT IOS) +- if(ARM AND ENABLE_NEON) +- set(FP16_OPTION "-mfpu=neon-fp16") +- elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX) +- set(FP16_OPTION "-mf16c") +- endif() +- try_compile(__VALID_FP16 +- "${OpenCV_BINARY_DIR}" +- "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" +- COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" +- OUTPUT_VARIABLE TRY_OUT +- ) +- if(NOT __VALID_FP16) +- if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX) +- # GCC enables AVX when mf16c is passed +- message(STATUS "FP16: Feature disabled") +- else() +- message(STATUS "FP16: Compiler support is not available") +- endif() +- else() +- message(STATUS "FP16: Compiler support is available") +- set(HAVE_FP16 1) +- if(NOT ${FP16_OPTION} STREQUAL "") +- add_extra_compiler_option(${FP16_OPTION}) +- endif() +- endif() +-endif() +- + #combine all "extra" options + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") +@@ -450,6 +422,21 @@ if(MSVC) + endif() + endif() + ++if(NOT OPENCV_FP16_DISABLE) ++ try_compile(__VALID_FP16 ++ "${OpenCV_BINARY_DIR}" ++ "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" ++ COMPILE_DEFINITIONS "-DCHECK_FP16" ++ OUTPUT_VARIABLE TRY_OUT ++ ) ++ if(NOT __VALID_FP16) ++ message(STATUS "FP16: Compiler support is not available") ++ else() ++ message(STATUS "FP16: Compiler support is available") ++ set(HAVE_FP16 1) ++ endif() ++endif() ++ + if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib") + link_directories("/usr/local/lib") + endif() +diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h +index efc24ca..a10936b 100644 +--- a/modules/core/include/opencv2/core/cvdef.h ++++ b/modules/core/include/opencv2/core/cvdef.h +@@ -312,7 +312,7 @@ enum CpuFeatures { + typedef union Cv16suf + { + short i; +-#if CV_FP16_TYPE ++#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) + __fp16 h; + #endif + struct _fp16Format +diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp +index e04d89e..46db26f 100644 +--- a/modules/core/src/convert.cpp ++++ b/modules/core/src/convert.cpp +@@ -44,7 +44,6 @@ + #include "precomp.hpp" + + #include "opencl_kernels_core.hpp" +-#include "opencv2/core/hal/intrin.hpp" + + #include "opencv2/core/openvx/ovx_defs.hpp" + +@@ -4382,7 +4381,7 @@ struct Cvt_SIMD + + #endif + +-#if !CV_FP16_TYPE ++#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) ) + // const numbers for floating points format + const unsigned int kShiftSignificand = 13; + const unsigned int kMaskFp16Significand = 0x3ff; +@@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent = 15; + const unsigned int kBiasFp32Exponent = 127; + #endif + +-#if CV_FP16_TYPE ++#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) + static float convertFp16SW(short fp16) + { + // Fp16 -> Fp32 +@@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16) + } + #endif + +-#if CV_FP16_TYPE ++#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) + static short convertFp16SW(float fp32) + { + // Fp32 -> Fp16 +@@ -4560,7 +4559,7 @@ cvtScaleHalf_( const float* src, size_t sstep, short* dst, size_t + if ( ( (intptr_t)dst & 0xf ) == 0 ) + #endif + { +-#if CV_FP16 && CV_SIMD128 ++#if CV_FP16 + for ( ; x <= size.width - 4; x += 4) + { + v_float32x4 v_src = v_load(src + x); +@@ -4606,7 +4605,7 @@ cvtScaleHalf_( const short* src, size_t sstep, float* dst, size_t + if ( ( (intptr_t)src & 0xf ) == 0 ) + #endif + { +-#if CV_FP16 && CV_SIMD128 ++#if CV_FP16 + for ( ; x <= size.width - 4; x += 4) + { + v_float16x4 v_src = v_load_f16(src + x); +diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp +index 66b2083..7349d48 100644 +--- a/modules/core/test/test_intrin.cpp ++++ b/modules/core/test/test_intrin.cpp +@@ -729,56 +729,48 @@ template struct TheTest + return *this; + } + ++#if CV_FP16 + TheTest & test_loadstore_fp16() + { +-#if CV_FP16 + AlignedData data; + AlignedData out; + +- if(checkHardwareSupport(CV_CPU_FP16)) +- { +- // check if addresses are aligned and unaligned respectively +- EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); +- EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); +- EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); +- EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); +- +- // check some initialization methods +- R r1 = data.u; +- R r2 = v_load_f16(data.a.d); +- R r3(r2); +- EXPECT_EQ(data.u[0], r1.get0()); +- EXPECT_EQ(data.a[0], r2.get0()); +- EXPECT_EQ(data.a[0], r3.get0()); +- +- // check some store methods +- out.a.clear(); +- v_store_f16(out.a.d, r1); +- EXPECT_EQ(data.a, out.a); +- } ++ // check if addresses are aligned and unaligned respectively ++ EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); ++ EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); ++ EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); ++ EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); ++ ++ // check some initialization methods ++ R r1 = data.u; ++ R r2 = v_load_f16(data.a.d); ++ R r3(r2); ++ EXPECT_EQ(data.u[0], r1.get0()); ++ EXPECT_EQ(data.a[0], r2.get0()); ++ EXPECT_EQ(data.a[0], r3.get0()); ++ ++ // check some store methods ++ out.a.clear(); ++ v_store_f16(out.a.d, r1); ++ EXPECT_EQ(data.a, out.a); + + return *this; +-#endif + } + + TheTest & test_float_cvt_fp16() + { +-#if CV_FP16 + AlignedData data; + +- if(checkHardwareSupport(CV_CPU_FP16)) +- { +- // check conversion +- v_float32x4 r1 = v_load(data.a.d); +- v_float16x4 r2 = v_cvt_f16(r1); +- v_float32x4 r3 = v_cvt_f32(r2); +- EXPECT_EQ(0x3c00, r2.get0()); +- EXPECT_EQ(r3.get0(), r1.get0()); +- } ++ // check conversion ++ v_float32x4 r1 = v_load(data.a.d); ++ v_float16x4 r2 = v_cvt_f16(r1); ++ v_float32x4 r3 = v_cvt_f32(r2); ++ EXPECT_EQ(0x3c00, r2.get0()); ++ EXPECT_EQ(r3.get0(), r1.get0()); + + return *this; +-#endif + } ++#endif + + }; + +-- +2.9.3 + diff --git a/meta-oe/recipes-support/opencv/opencv_3.2.bb b/meta-oe/recipes-support/opencv/opencv_3.2.bb index 2cff212ab0..98b6b06529 100644 --- a/meta-oe/recipes-support/opencv/opencv_3.2.bb +++ b/meta-oe/recipes-support/opencv/opencv_3.2.bb @@ -27,6 +27,8 @@ SRC_URI = "git://github.com/opencv/opencv.git;name=opencv \ file://fixpkgconfig.patch \ file://uselocalxfeatures.patch;patchdir=../contrib/ \ file://useoeprotobuf.patch;patchdir=../contrib/ \ + file://0001-Revert-cuda-fix-fp16-compilation.patch \ + file://0002-Revert-check-FP16-build-condition-correctly.patch \ " PV = "3.2+git${SRCPV}" -- cgit 1.2.3-korg