mirror of
https://gitcode.com/gh_mirrors/es/esp32-opencv.git
synced 2025-08-06 18:24:38 +08:00
Started to optimize OpenCV for the ESP32
- Using float instead of double for floating point matrix multiplications (in core/src/matmul.simd.hpp) reduces a lot the computation time
This commit is contained in:
@ -202,6 +202,9 @@ endif()
|
|||||||
# ESP32 target option. Enable it with -DESP32=ON will disable/modify parts of the code for compilation to work
|
# ESP32 target option. Enable it with -DESP32=ON will disable/modify parts of the code for compilation to work
|
||||||
OCV_OPTION(ESP32 "Compilation for esp32 target" OFF)
|
OCV_OPTION(ESP32 "Compilation for esp32 target" OFF)
|
||||||
|
|
||||||
|
# ESP32 optimization. Enable it with -DESP32_OPTIMIZATION=ON will use code optimized for esp32 target
|
||||||
|
OCV_OPTION(ESP32_OPTIMIZATION "Optimization for the esp32 target" OFF)
|
||||||
|
|
||||||
OCV_OPTION(OPENCV_ENABLE_NONFREE "Enable non-free algorithms" OFF)
|
OCV_OPTION(OPENCV_ENABLE_NONFREE "Enable non-free algorithms" OFF)
|
||||||
|
|
||||||
# 3rd party libs
|
# 3rd party libs
|
||||||
@ -497,6 +500,12 @@ if(ESP32)
|
|||||||
add_definitions(-DESP32)
|
add_definitions(-DESP32)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(ESP32_OPTIMIZATION)
|
||||||
|
message(STATUS "Enabled ESP32 target optimized code")
|
||||||
|
add_definitions(-DESP32_OPTIMIZATION)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
if(ENABLE_IMPL_COLLECTION)
|
if(ENABLE_IMPL_COLLECTION)
|
||||||
add_definitions(-DCV_COLLECT_IMPL_DATA)
|
add_definitions(-DCV_COLLECT_IMPL_DATA)
|
||||||
endif()
|
endif()
|
||||||
|
57
esp32/doc/optimization.md
Normal file
57
esp32/doc/optimization.md
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# Optimization
|
||||||
|
|
||||||
|
This doc details some optimizations done for OpenCV to run faster on the ESP32.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Activating optimization
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
To activate the optimization for the ESP32, the CMake parameter `-DESP32_OPTIMIZATION=ON` must be enabled. Every optimization done will be disabled if this parameter is OFF.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Floating point support
|
||||||
|
|
||||||
|
The ESP32 only have a single precision Floating Point Unit (no double precision). Therefore, OpenCV functions using double types are very slow.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Matrix multiplications
|
||||||
|
|
||||||
|
In files `core/matmul.dispatch.cp` and `core/matmul.simd.hpp`.
|
||||||
|
|
||||||
|
Results by multiplying 100x6 * 6x100 matrices:
|
||||||
|
|
||||||
|
1. Initial test : 60 ms
|
||||||
|
2. Changing `alpha` and `beta` from double to float in `GEMMsingleMult()` function: 12ms
|
||||||
|
3. Changing `alpha` and `beta` from double to float in `gemmImpl()` function: 4.6ms
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Results by multiplying 150x100 * 100x150 matrices:
|
||||||
|
|
||||||
|
1. Initial test: 2757ms
|
||||||
|
2. Changing double in `GEMMStore()` function: 888ms
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Esp-dsp library
|
||||||
|
|
||||||
|
The ESP32 processor has the following hardware:
|
||||||
|
|
||||||
|
* 16/24-bit Instruction Set
|
||||||
|
* Support for FPU (Floating Point Unit)
|
||||||
|
* Support for DSP instructions
|
||||||
|
* 32-bit integer multiplier
|
||||||
|
* 32-bit integer divider
|
||||||
|
* 40-bit MAC (Multiply-Accumulate)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
The *esp-dsp* library (https://github.com/espressif/esp-dsp) provides functions written in assembly to use this hardware.
|
||||||
|
|
||||||
|
This part describes which functions are used where in OpenCV for better performances.
|
||||||
|
|
@ -16,7 +16,7 @@ TOOLCHAIN_CMAKE_PATH=$HOME/esp/esp-idf/tools/cmake/toolchain-esp32.cmake
|
|||||||
LIB_INSTALL_PATH=$SCRIPTDIR/../lib
|
LIB_INSTALL_PATH=$SCRIPTDIR/../lib
|
||||||
|
|
||||||
# list of modules to compile
|
# list of modules to compile
|
||||||
OPENCV_MODULES_LIST=core,imgproc,imgcodecs
|
OPENCV_MODULES_LIST=core,imgproc,imgcodecs,features2d,calib3d
|
||||||
|
|
||||||
echo "################################################################################"
|
echo "################################################################################"
|
||||||
echo "######################## build_opencv_for_esp32 script #########################"
|
echo "######################## build_opencv_for_esp32 script #########################"
|
||||||
@ -40,7 +40,7 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DESP32=ON -DBUILD_SHARED_LIBS=OFF -DCV_DISABLE_OPTIMIZATION=OFF -DWITH_IPP=OFF -DWITH_TBB=OFF -DWITH_OPENMP=OFF -DWITH_PTHREADS_PF=OFF -DWITH_QUIRC=OFF -DWITH_1394=OFF -DWITH_CUDA=OFF -DWITH_OPENCL=OFF -DWITH_OPENCLAMDFFT=OFF -DWITH_OPENCLAMDBLAS=OFF -DWITH_VA_INTEL=OFF -DWITH_EIGEN=OFF -DWITH_GSTREAMER=OFF -DWITH_GTK=OFF -DWITH_JASPER=OFF -DWITH_JPEG=OFF -DWITH_WEBP=OFF -DBUILD_ZLIB=ON -DBUILD_PNG=ON -DWITH_TIFF=OFF -DWITH_V4L=OFF -DWITH_LAPACK=OFF -DWITH_ITT=OFF -DWITH_PROTOBUF=OFF -DWITH_IMGCODEC_HDR=OFF -DWITH_IMGCODEC_SUNRASTER=OFF -DWITH_IMGCODEC_PXM=OFF -DWITH_IMGCODEC_PFM=OFF -DBUILD_LIST=${OPENCV_MODULES_LIST} -DBUILD_JAVA=OFF -DBUILD_opencv_python=OFF -DBUILD_opencv_java=OFF -DBUILD_opencv_apps=OFF -DBUILD_PACKAGE=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_TRACE=OFF -DOPENCV_ENABLE_MEMALIGN=OFF -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_CMAKE_PATH}"
|
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DESP32=ON -DESP32_OPTIMIZATION=ON -DBUILD_SHARED_LIBS=OFF -DCV_DISABLE_OPTIMIZATION=OFF -DWITH_IPP=OFF -DWITH_TBB=OFF -DWITH_OPENMP=OFF -DWITH_PTHREADS_PF=OFF -DWITH_QUIRC=OFF -DWITH_1394=OFF -DWITH_CUDA=OFF -DWITH_OPENCL=OFF -DWITH_OPENCLAMDFFT=OFF -DWITH_OPENCLAMDBLAS=OFF -DWITH_VA_INTEL=OFF -DWITH_EIGEN=OFF -DWITH_GSTREAMER=OFF -DWITH_GTK=OFF -DWITH_JASPER=OFF -DWITH_JPEG=OFF -DWITH_WEBP=OFF -DBUILD_ZLIB=ON -DBUILD_PNG=ON -DWITH_TIFF=OFF -DWITH_V4L=OFF -DWITH_LAPACK=OFF -DWITH_ITT=OFF -DWITH_PROTOBUF=OFF -DWITH_IMGCODEC_HDR=OFF -DWITH_IMGCODEC_SUNRASTER=OFF -DWITH_IMGCODEC_PXM=OFF -DWITH_IMGCODEC_PFM=OFF -DBUILD_LIST=${OPENCV_MODULES_LIST} -DBUILD_JAVA=OFF -DBUILD_opencv_python=OFF -DBUILD_opencv_java=OFF -DBUILD_opencv_apps=OFF -DBUILD_PACKAGE=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_TESTS=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_TRACE=OFF -DOPENCV_ENABLE_MEMALIGN=OFF -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_CMAKE_PATH}"
|
||||||
|
|
||||||
|
|
||||||
### configure and build opencv ###
|
### configure and build opencv ###
|
||||||
@ -67,7 +67,7 @@ cp $SCRIPTDIR/resources/alloc_fix.cpp ./3rdparty/ade/ade-0.1.1f/sources/ade/sour
|
|||||||
echo "================================================================================"
|
echo "================================================================================"
|
||||||
echo "Compiling with make -j"
|
echo "Compiling with make -j"
|
||||||
echo "================================================================================"
|
echo "================================================================================"
|
||||||
make -j
|
make -j3
|
||||||
|
|
||||||
### installing in output directory ###
|
### installing in output directory ###
|
||||||
echo "================================================================================"
|
echo "================================================================================"
|
||||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user