mirror of
https://gitcode.com/gh_mirrors/es/esp32-opencv.git
synced 2025-08-14 18:50:49 +08:00
220 lines
6.6 KiB
C++
220 lines
6.6 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html
|
|
|
|
#include "precomp.hpp"
|
|
|
|
namespace cv {
|
|
|
|
typedef int (*CountNonZeroFunc)(const uchar*, int);
|
|
|
|
|
|
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
|
|
|
CountNonZeroFunc getCountNonZeroTab(int depth);
|
|
|
|
|
|
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
|
|
|
template<typename T>
|
|
static int countNonZero_(const T* src, int len )
|
|
{
|
|
int i=0, nz = 0;
|
|
#if CV_ENABLE_UNROLLED
|
|
for(; i <= len - 4; i += 4 )
|
|
nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
|
|
#endif
|
|
for( ; i < len; i++ )
|
|
nz += src[i] != 0;
|
|
return nz;
|
|
}
|
|
|
|
static int countNonZero8u( const uchar* src, int len )
|
|
{
|
|
int i=0, nz = 0;
|
|
#if CV_SIMD
|
|
int len0 = len & -v_uint8::nlanes;
|
|
v_uint8 v_zero = vx_setzero_u8();
|
|
v_uint8 v_one = vx_setall_u8(1);
|
|
|
|
v_uint32 v_sum32 = vx_setzero_u32();
|
|
while (i < len0)
|
|
{
|
|
v_uint16 v_sum16 = vx_setzero_u16();
|
|
int j = i;
|
|
while (j < std::min(len0, i + 65280 * v_uint16::nlanes))
|
|
{
|
|
v_uint8 v_sum8 = vx_setzero_u8();
|
|
int k = j;
|
|
for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes)
|
|
v_sum8 += v_one & (vx_load(src + k) == v_zero);
|
|
v_uint16 part1, part2;
|
|
v_expand(v_sum8, part1, part2);
|
|
v_sum16 += part1 + part2;
|
|
j = k;
|
|
}
|
|
v_uint32 part1, part2;
|
|
v_expand(v_sum16, part1, part2);
|
|
v_sum32 += part1 + part2;
|
|
i = j;
|
|
}
|
|
nz = i - v_reduce_sum(v_sum32);
|
|
v_cleanup();
|
|
#endif
|
|
for( ; i < len; i++ )
|
|
nz += src[i] != 0;
|
|
return nz;
|
|
}
|
|
|
|
static int countNonZero16u( const ushort* src, int len )
|
|
{
|
|
int i = 0, nz = 0;
|
|
#if CV_SIMD
|
|
int len0 = len & -v_int8::nlanes;
|
|
v_uint16 v_zero = vx_setzero_u16();
|
|
v_int8 v_one = vx_setall_s8(1);
|
|
|
|
v_int32 v_sum32 = vx_setzero_s32();
|
|
while (i < len0)
|
|
{
|
|
v_int16 v_sum16 = vx_setzero_s16();
|
|
int j = i;
|
|
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
|
|
{
|
|
v_int8 v_sum8 = vx_setzero_s8();
|
|
int k = j;
|
|
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
|
|
v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero));
|
|
v_int16 part1, part2;
|
|
v_expand(v_sum8, part1, part2);
|
|
v_sum16 += part1 + part2;
|
|
j = k;
|
|
}
|
|
v_int32 part1, part2;
|
|
v_expand(v_sum16, part1, part2);
|
|
v_sum32 += part1 + part2;
|
|
i = j;
|
|
}
|
|
nz = i - v_reduce_sum(v_sum32);
|
|
v_cleanup();
|
|
#endif
|
|
return nz + countNonZero_(src + i, len - i);
|
|
}
|
|
|
|
static int countNonZero32s( const int* src, int len )
|
|
{
|
|
int i = 0, nz = 0;
|
|
#if CV_SIMD
|
|
int len0 = len & -v_int8::nlanes;
|
|
v_int32 v_zero = vx_setzero_s32();
|
|
v_int8 v_one = vx_setall_s8(1);
|
|
|
|
v_int32 v_sum32 = vx_setzero_s32();
|
|
while (i < len0)
|
|
{
|
|
v_int16 v_sum16 = vx_setzero_s16();
|
|
int j = i;
|
|
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
|
|
{
|
|
v_int8 v_sum8 = vx_setzero_s8();
|
|
int k = j;
|
|
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
|
|
v_sum8 += v_one & v_pack(
|
|
v_pack(vx_load(src + k ) == v_zero, vx_load(src + k + v_int32::nlanes) == v_zero),
|
|
v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero)
|
|
);
|
|
v_int16 part1, part2;
|
|
v_expand(v_sum8, part1, part2);
|
|
v_sum16 += part1 + part2;
|
|
j = k;
|
|
}
|
|
v_int32 part1, part2;
|
|
v_expand(v_sum16, part1, part2);
|
|
v_sum32 += part1 + part2;
|
|
i = j;
|
|
}
|
|
nz = i - v_reduce_sum(v_sum32);
|
|
v_cleanup();
|
|
#endif
|
|
return nz + countNonZero_(src + i, len - i);
|
|
}
|
|
|
|
static int countNonZero32f( const float* src, int len )
|
|
{
|
|
int i = 0, nz = 0;
|
|
#if CV_SIMD
|
|
int len0 = len & -v_int8::nlanes;
|
|
v_float32 v_zero = vx_setzero_f32();
|
|
v_int8 v_one = vx_setall_s8(1);
|
|
|
|
v_int32 v_sum32 = vx_setzero_s32();
|
|
while (i < len0)
|
|
{
|
|
v_int16 v_sum16 = vx_setzero_s16();
|
|
int j = i;
|
|
while (j < std::min(len0, i + 32766 * v_int16::nlanes))
|
|
{
|
|
v_int8 v_sum8 = vx_setzero_s8();
|
|
int k = j;
|
|
for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
|
|
v_sum8 += v_one & v_pack(
|
|
v_pack(v_reinterpret_as_s32(vx_load(src + k ) == v_zero), v_reinterpret_as_s32(vx_load(src + k + v_float32::nlanes) == v_zero)),
|
|
v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero))
|
|
);
|
|
v_int16 part1, part2;
|
|
v_expand(v_sum8, part1, part2);
|
|
v_sum16 += part1 + part2;
|
|
j = k;
|
|
}
|
|
v_int32 part1, part2;
|
|
v_expand(v_sum16, part1, part2);
|
|
v_sum32 += part1 + part2;
|
|
i = j;
|
|
}
|
|
nz = i - v_reduce_sum(v_sum32);
|
|
v_cleanup();
|
|
#endif
|
|
return nz + countNonZero_(src + i, len - i);
|
|
}
|
|
|
|
static int countNonZero64f( const double* src, int len )
|
|
{
|
|
int nz = 0, i = 0;
|
|
#if CV_SIMD_64F
|
|
v_int64 sum1 = vx_setzero_s64();
|
|
v_int64 sum2 = vx_setzero_s64();
|
|
v_float64 zero = vx_setzero_f64();
|
|
int step = v_float64::nlanes * 2;
|
|
int len0 = len & -step;
|
|
|
|
for(i = 0; i < len0; i += step )
|
|
{
|
|
sum1 += v_reinterpret_as_s64(vx_load(&src[i]) == zero);
|
|
sum2 += v_reinterpret_as_s64(vx_load(&src[i + step / 2]) == zero);
|
|
}
|
|
|
|
// N.B the value is incremented by -1 (0xF...F) for each value
|
|
nz = i + (int)v_reduce_sum(sum1 + sum2);
|
|
v_cleanup();
|
|
#endif
|
|
return nz + countNonZero_(src + i, len - i);
|
|
}
|
|
|
|
CountNonZeroFunc getCountNonZeroTab(int depth)
|
|
{
|
|
static CountNonZeroFunc countNonZeroTab[] =
|
|
{
|
|
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
|
|
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
|
|
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
|
|
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
|
|
};
|
|
|
|
return countNonZeroTab[depth];
|
|
}
|
|
|
|
#endif
|
|
|
|
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
|
} // namespace
|