meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001
From: Randy MacLeod <Randy.MacLeod@windriver.com>
Date: Wed, 26 Apr 2017 15:00:32 -0400
Subject: [PATCH 2/2] Revert "check FP16 build condition correctly"

This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13.

Fix up revert conflicts to take previous behaviour.
---
 cmake/OpenCVCompilerOptions.cmake         | 45 +++++++++--------------
 modules/core/include/opencv2/core/cvdef.h |  2 +-
 modules/core/src/convert.cpp              | 11 +++---
 modules/core/test/test_intrin.cpp         | 60 ++++++++++++++-----------------
 4 files changed, 48 insertions(+), 70 deletions(-)

diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index 5bb0479..4b19fdb 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
     add_extra_compiler_option("-mfp16-format=ieee")
   endif(ARM)
   if(ENABLE_NEON)
-    add_extra_compiler_option("-mfpu=neon")
+    add_extra_compiler_option("-mfpu=neon-fp16")
   endif()
   if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
     add_extra_compiler_option("-mfpu=vfpv3")
@@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
   add_extra_compiler_option(-fvisibility-inlines-hidden)
 endif()
 
-if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
-  if(ARM AND ENABLE_NEON)
-    set(FP16_OPTION "-mfpu=neon-fp16")
-  elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX)
-    set(FP16_OPTION "-mf16c")
-  endif()
-  try_compile(__VALID_FP16
-    "${OpenCV_BINARY_DIR}"
-    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
-    COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
-    OUTPUT_VARIABLE TRY_OUT
-    )
-  if(NOT __VALID_FP16)
-    if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX)
-      # GCC enables AVX when mf16c is passed
-      message(STATUS "FP16: Feature disabled")
-    else()
-      message(STATUS "FP16: Compiler support is not available")
-    endif()
-  else()
-    message(STATUS "FP16: Compiler support is available")
-    set(HAVE_FP16 1)
-    if(NOT ${FP16_OPTION} STREQUAL "")
-      add_extra_compiler_option(${FP16_OPTION})
-    endif()
-  endif()
-endif()
-
 #combine all "extra" options
 set(CMAKE_C_FLAGS           "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
 set(CMAKE_CXX_FLAGS         "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
@@ -450,6 +422,21 @@ if(MSVC)
   endif()
 endif()
 
+if(NOT OPENCV_FP16_DISABLE)
+  try_compile(__VALID_FP16
+    "${OpenCV_BINARY_DIR}"
+    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
+    COMPILE_DEFINITIONS "-DCHECK_FP16"
+    OUTPUT_VARIABLE TRY_OUT
+    )
+  if(NOT __VALID_FP16)
+    message(STATUS "FP16: Compiler support is not available")
+  else()
+    message(STATUS "FP16: Compiler support is available")
+    set(HAVE_FP16 1)
+  endif()
+endif()
+
 if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib")
   link_directories("/usr/local/lib")
 endif()
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index efc24ca..a10936b 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -312,7 +312,7 @@ enum CpuFeatures {
 typedef union Cv16suf
 {
     short i;
-#if CV_FP16_TYPE
+#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
     __fp16 h;
 #endif
     struct _fp16Format
diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
index e04d89e..46db26f 100644
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@@ -44,7 +44,6 @@
 #include "precomp.hpp"
 
 #include "opencl_kernels_core.hpp"
-#include "opencv2/core/hal/intrin.hpp"
 
 #include "opencv2/core/openvx/ovx_defs.hpp"
 
@@ -4382,7 +4381,7 @@ struct Cvt_SIMD<float, int>
 
 #endif
 
-#if !CV_FP16_TYPE
+#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) )
 // const numbers for floating points format
 const unsigned int kShiftSignificand    = 13;
 const unsigned int kMaskFp16Significand = 0x3ff;
@@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent    = 15;
 const unsigned int kBiasFp32Exponent    = 127;
 #endif
 
-#if CV_FP16_TYPE
+#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
 static float convertFp16SW(short fp16)
 {
     // Fp16 -> Fp32
@@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16)
 }
 #endif
 
-#if CV_FP16_TYPE
+#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
 static short convertFp16SW(float fp32)
 {
     // Fp32 -> Fp16
@@ -4560,7 +4559,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
             if ( ( (intptr_t)dst & 0xf ) == 0 )
 #endif
             {
-#if CV_FP16 && CV_SIMD128
+#if CV_FP16
                 for ( ; x <= size.width - 4; x += 4)
                 {
                     v_float32x4 v_src = v_load(src + x);
@@ -4606,7 +4605,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
             if ( ( (intptr_t)src & 0xf ) == 0 )
 #endif
             {
-#if CV_FP16 && CV_SIMD128
+#if CV_FP16
                 for ( ; x <= size.width - 4; x += 4)
                 {
                     v_float16x4 v_src = v_load_f16(src + x);
diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp
index 66b2083..7349d48 100644
--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@@ -729,56 +729,48 @@ template<typename R> struct TheTest
         return *this;
     }
 
+#if CV_FP16
     TheTest & test_loadstore_fp16()
     {
-#if CV_FP16
         AlignedData<R> data;
         AlignedData<R> out;
 
-        if(checkHardwareSupport(CV_CPU_FP16))
-        {
-            // check if addresses are aligned and unaligned respectively
-            EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
-            EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
-            EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
-            EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
-
-            // check some initialization methods
-            R r1 = data.u;
-            R r2 = v_load_f16(data.a.d);
-            R r3(r2);
-            EXPECT_EQ(data.u[0], r1.get0());
-            EXPECT_EQ(data.a[0], r2.get0());
-            EXPECT_EQ(data.a[0], r3.get0());
-
-            // check some store methods
-            out.a.clear();
-            v_store_f16(out.a.d, r1);
-            EXPECT_EQ(data.a, out.a);
-        }
+        // check if addresses are aligned and unaligned respectively
+        EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
+        EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
+        EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
+        EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
+
+        // check some initialization methods
+        R r1 = data.u;
+        R r2 = v_load_f16(data.a.d);
+        R r3(r2);
+        EXPECT_EQ(data.u[0], r1.get0());
+        EXPECT_EQ(data.a[0], r2.get0());
+        EXPECT_EQ(data.a[0], r3.get0());
+
+        // check some store methods
+        out.a.clear();
+        v_store_f16(out.a.d, r1);
+        EXPECT_EQ(data.a, out.a);
 
         return *this;
-#endif
     }
 
     TheTest & test_float_cvt_fp16()
     {
-#if CV_FP16
         AlignedData<v_float32x4> data;
 
-        if(checkHardwareSupport(CV_CPU_FP16))
-        {
-            // check conversion
-            v_float32x4 r1 = v_load(data.a.d);
-            v_float16x4 r2 = v_cvt_f16(r1);
-            v_float32x4 r3 = v_cvt_f32(r2);
-            EXPECT_EQ(0x3c00, r2.get0());
-            EXPECT_EQ(r3.get0(), r1.get0());
-        }
+        // check conversion
+        v_float32x4 r1 = v_load(data.a.d);
+        v_float16x4 r2 = v_cvt_f16(r1);
+        v_float32x4 r3 = v_cvt_f32(r2);
+        EXPECT_EQ(0x3c00, r2.get0());
+        EXPECT_EQ(r3.get0(), r1.get0());
 
         return *this;
-#endif
     }
+#endif
 
 };
 
-- 
2.9.3