mirror of
https://gitcode.com/gh_mirrors/es/esp32-opencv.git
synced 2025-08-14 01:57:43 +08:00
Started to optimize OpenCV for the ESP32
- Using float instead of double for floating point matrix multiplications (in core/src/matmul.simd.hpp) reduces a lot the computation time
This commit is contained in:
@ -202,6 +202,9 @@ endif()
|
||||
# ESP32 target option. Enable it with -DESP32=ON will disable/modify parts of the code for compilation to work
|
||||
OCV_OPTION(ESP32 "Compilation for esp32 target" OFF)
|
||||
|
||||
# ESP32 optimization. Enable it with -DESP32_OPTIMIZATION=ON will use code optimized for esp32 target
|
||||
OCV_OPTION(ESP32_OPTIMIZATION "Optimization for the esp32 target" OFF)
|
||||
|
||||
OCV_OPTION(OPENCV_ENABLE_NONFREE "Enable non-free algorithms" OFF)
|
||||
|
||||
# 3rd party libs
|
||||
@ -497,6 +500,12 @@ if(ESP32)
|
||||
add_definitions(-DESP32)
|
||||
endif()
|
||||
|
||||
if(ESP32_OPTIMIZATION)
|
||||
message(STATUS "Enabled ESP32 target optimized code")
|
||||
add_definitions(-DESP32_OPTIMIZATION)
|
||||
endif()
|
||||
|
||||
|
||||
if(ENABLE_IMPL_COLLECTION)
|
||||
add_definitions(-DCV_COLLECT_IMPL_DATA)
|
||||
endif()
|
||||
|
57
esp32/doc/optimization.md
Normal file
57
esp32/doc/optimization.md
Normal file
@ -0,0 +1,57 @@
|
||||
# Optimization
|
||||
|
||||
This doc details some optimizations done for OpenCV to run faster on the ESP32.
|
||||
|
||||
|
||||
|
||||
Activating optimization
|
||||
----------------------------
|
||||
|
||||
To activate the optimization for the ESP32, the CMake parameter `-DESP32_OPTIMIZATION=ON` must be enabled. Every optimization done will be disabled if this parameter is OFF.
|
||||
|
||||
|
||||
|
||||
## Floating point support
|
||||
|
||||
The ESP32 only have a single precision Floating Point Unit (no double precision). Therefore, OpenCV functions using double types are very slow.
|
||||
|
||||
|
||||
|
||||
### Matrix multiplications
|
||||
|
||||
In files `core/matmul.dispatch.cp` and `core/matmul.simd.hpp`.
|
||||
|
||||
Results by multiplying 100x6 * 6x100 matrices:
|
||||
|
||||
1. Initial test : 60 ms
|
||||
2. Changing `alpha` and `beta` from double to float in `GEMMsingleMult()` function: 12ms
|
||||
3. Changing `alpha` and `beta` from double to float in `gemmImpl()` function: 4.6ms
|
||||
|
||||
|
||||
|
||||
Results by multiplying 150x100 * 100x150 matrices:
|
||||
|
||||
1. Initial test: 2757ms
|
||||
2. Changing double in `GEMMStore()` function: 888ms
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Esp-dsp library
|
||||
|
||||
The ESP32 processor has the following hardware:
|
||||
|
||||
* 16/24-bit Instruction Set
|
||||
* Support for FPU (Floating Point Unit)
|
||||
* Support for DSP instructions
|
||||
* 32-bit integer multiplier
|
||||
* 32-bit integer divider
|
||||
* 40-bit MAC (Multiply-Accumulate)
|
||||
|
||||
|
||||
|
||||
The *esp-dsp* library (https://github.com/espressif/esp-dsp) provides functions written in assembly to use this hardware.
|
||||
|
||||
This part describes which functions are used where in OpenCV for better performances.
|
||||
|
@ -16,7 +16,7 @@ TOOLCHAIN_CMAKE_PATH=$HOME/esp/esp-idf/tools/cmake/toolchain-esp32.cmake
|
||||
LIB_INSTALL_PATH=$SCRIPTDIR/../lib
|
||||
|
||||
# list of modules to compile
|
||||
OPENCV_MODULES_LIST=core,imgproc,imgcodecs
|
||||
OPENCV_MODULES_LIST=core,imgproc,imgcodecs,features2d,calib3d
|
||||
|
||||
echo "################################################################################"
|
||||
echo "######################## build_opencv_for_esp32 script #########################"
|
||||
@ -40,7 +40,7 @@ else
|
||||
fi
|
||||
|
||||
|
||||
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DESP32=ON -DBUILD_SHARED_LIBS=OFF -DCV_DISABLE_OPTIMIZATION=OFF -DWITH_IPP=OFF -DWITH_TBB=OFF -DWITH_OPENMP=OFF -DWITH_PTHREADS_PF=OFF -DWITH_QUIRC=OFF -DWITH_1394=OFF -DWITH_CUDA=OFF -DWITH_OPENCL=OFF -DWITH_OPENCLAMDFFT=OFF -DWITH_OPENCLAMDBLAS=OFF -DWITH_VA_INTEL=OFF -DWITH_EIGEN=OFF -DWITH_GSTREAMER=OFF -DWITH_GTK=OFF -DWITH_JASPER=OFF -DWITH_JPEG=OFF -DWITH_WEBP=OFF -DBUILD_ZLIB=ON -DBUILD_PNG=ON -DWITH_TIFF=OFF -DWITH_V4L=OFF -DWITH_LAPACK=OFF -DWITH_ITT=OFF -DWITH_PROTOBUF=OFF -DWITH_IMGCODEC_HDR=OFF -DWITH_IMGCODEC_SUNRASTER=OFF -DWITH_IMGCODEC_PXM=OFF -DWITH_IMGCODEC_PFM=OFF -DBUILD_LIST=${OPENCV_MODULES_LIST} -DBUILD_JAVA=OFF -DBUILD_opencv_python=OFF -DBUILD_opencv_java=OFF -DBUILD_opencv_apps=OFF -DBUILD_PACKAGE=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_TRACE=OFF -DOPENCV_ENABLE_MEMALIGN=OFF -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_CMAKE_PATH}"
|
||||
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DESP32=ON -DESP32_OPTIMIZATION=ON -DBUILD_SHARED_LIBS=OFF -DCV_DISABLE_OPTIMIZATION=OFF -DWITH_IPP=OFF -DWITH_TBB=OFF -DWITH_OPENMP=OFF -DWITH_PTHREADS_PF=OFF -DWITH_QUIRC=OFF -DWITH_1394=OFF -DWITH_CUDA=OFF -DWITH_OPENCL=OFF -DWITH_OPENCLAMDFFT=OFF -DWITH_OPENCLAMDBLAS=OFF -DWITH_VA_INTEL=OFF -DWITH_EIGEN=OFF -DWITH_GSTREAMER=OFF -DWITH_GTK=OFF -DWITH_JASPER=OFF -DWITH_JPEG=OFF -DWITH_WEBP=OFF -DBUILD_ZLIB=ON -DBUILD_PNG=ON -DWITH_TIFF=OFF -DWITH_V4L=OFF -DWITH_LAPACK=OFF -DWITH_ITT=OFF -DWITH_PROTOBUF=OFF -DWITH_IMGCODEC_HDR=OFF -DWITH_IMGCODEC_SUNRASTER=OFF -DWITH_IMGCODEC_PXM=OFF -DWITH_IMGCODEC_PFM=OFF -DBUILD_LIST=${OPENCV_MODULES_LIST} -DBUILD_JAVA=OFF -DBUILD_opencv_python=OFF -DBUILD_opencv_java=OFF -DBUILD_opencv_apps=OFF -DBUILD_PACKAGE=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_TRACE=OFF -DOPENCV_ENABLE_MEMALIGN=OFF -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_CMAKE_PATH}"
|
||||
|
||||
|
||||
### configure and build opencv ###
|
||||
@ -67,7 +67,7 @@ cp $SCRIPTDIR/resources/alloc_fix.cpp ./3rdparty/ade/ade-0.1.1f/sources/ade/sour
|
||||
echo "================================================================================"
|
||||
echo "Compiling with make -j"
|
||||
echo "================================================================================"
|
||||
make -j
|
||||
make -j3
|
||||
|
||||
### installing in output directory ###
|
||||
echo "================================================================================"
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user