Compare commits
52 Commits
main
...
feature-ro
Author | SHA1 | Date | |
---|---|---|---|
ad95fac40c | |||
699169140a | |||
5e02244e93 | |||
1a49d466fe | |||
a9d69311a8 | |||
11517dad5c | |||
5035233913 | |||
2d467ebbef | |||
ca1e8149a1 | |||
1749a2c720 | |||
bcdade9daa | |||
cfdb84093d | |||
31c51d77ba | |||
e5cf60e254 | |||
69226dd197 | |||
d9a50a40d1 | |||
e6f440006b | |||
fa61990d3c | |||
b891c34176 | |||
d119d45722 | |||
25f0b54a5a | |||
4b26673655 | |||
f6ee887e20 | |||
9bf943b8d1 | |||
3c3b0d4632 | |||
d7afc907c2 | |||
1b2286100e | |||
82320082fa | |||
59bbc596bb | |||
b041883bca | |||
b4042ca7cf | |||
c879408141 | |||
d8d888c045 | |||
a0f53bc38e | |||
7346fc74b9 | |||
93220b592e | |||
4708a02218 | |||
88fdb73ae2 | |||
a19993a747 | |||
7d7cba4a4c | |||
df05d073c7 | |||
bf6fb79131 | |||
18a73abd03 | |||
e54921809c | |||
34e8f8bd67 | |||
e0202a0fd9 | |||
199ea46bb1 | |||
f794aeb840 | |||
1efe5f11ac | |||
1081c707af | |||
489d5428e3 | |||
dea266853c |
@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
|
||||
project(match)
|
||||
find_package(OpenCV 4.8 REQUIRED)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
|
||||
option(ENABLE_OPENMP "enable openmp" OFF)
|
||||
if(ENABLE_OPENMP)
|
||||
@ -21,28 +21,24 @@ endif(ENABLE_OPENMP)
|
||||
add_library(algo SHARED
|
||||
grayMatch.h
|
||||
grayMatch.cpp
|
||||
serialize.cpp
|
||||
sum.h
|
||||
sum.cpp
|
||||
privateType.h
|
||||
apiExport.h
|
||||
integral.h
|
||||
integral.cpp
|
||||
)
|
||||
|
||||
target_include_directories(algo PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_link_libraries(algo ${OpenCV_LIBRARIES} $<$<BOOL:${OPENMP_FOUND}>:OpenMP::OpenMP_CXX>)
|
||||
target_compile_options(algo PRIVATE
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wl,--exclude-libs,ALL -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:-fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
|
||||
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
|
||||
#$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Darwin>>: -fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:-mlsx>
|
||||
)
|
||||
target_compile_definitions(algo PUBLIC API_EXPORTS
|
||||
target_compile_definitions(algo PRIVATE API_EXPORTS
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:CV_LSX>
|
||||
)
|
||||
|
||||
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")
|
||||
|
||||
#==============================================================
|
||||
#exe
|
||||
#==============================================================
|
||||
@ -58,3 +54,5 @@ target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
|
||||
)
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE IMG_DIR="${CMAKE_CURRENT_SOURCE_DIR}/img")
|
||||
|
||||
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")
|
||||
|
24
LICENSE
24
LICENSE
@ -1,24 +0,0 @@
|
||||
BSD 2-Clause License
|
||||
|
||||
Copyright (c) 2024, SurfaceMan
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
25
README.md
25
README.md
@ -1,20 +1,13 @@
|
||||
# Template match with gray model(ncc)
|
||||
|
||||
## Note: branch [feature-rotate-model](https://github.com/SurfaceMan/gray_match/tree/feature-rotate-model) method 2x faster at matching!
|
||||
|
||||
## highlights:
|
||||
1. original code based [Fastest_Image_Pattern_Matching](https://github.com/DennisLiu1993/Fastest_Image_Pattern_Matching), you can check out tag [v1.0](https://github.com/SurfaceMan/gray_match/releases/tag/v1.0) for more details.
|
||||
2. refactor simd match process with opencv [Universal intrinsics](https://docs.opencv.org/4.x/df/d91/group__core__hal__intrin.html), have be tested on x86_64(sse),arm(neon),LoongArch(lsx).
|
||||
3. support model save/load as binary file
|
||||
4. provide pure c interface
|
||||
5. support openmp
|
||||
6. position with 3x3 subpixel interpolation
|
||||
|
||||
## usage:
|
||||
all you need can be found in [main.cpp](main.cpp)
|
||||
|
||||
## gallery:
|
||||

|
||||
## rotate-model vs main branch
|
||||
|
||||
|
||||
|
||||
| method | main(ms) | rotate-model(ms) | factor(main/rotate-model) |
|
||||
|-----------|----------|------------------|---------------------------|
|
||||
| train | 1 | 680 | 1/680 |
|
||||
| match | 31 | 16 | 2 |
|
||||
| train-omp | 1 | 160 | 1/160 |
|
||||
| match-omp | 12 | 6 | 2 |
|
||||
|
||||
result: main brain method train model really fast(1ms) and parameter free, rotate-model method 2x faster in matching.
|
1
TODO.md
1
TODO.md
@ -1 +0,0 @@
|
||||
1. opencv intrinsics api [changed](https://github.com/opencv/opencv/pull/24371) since 4.9, need refactor
|
1086
grayMatch.cpp
1086
grayMatch.cpp
File diff suppressed because it is too large
Load Diff
98
grayMatch.h
98
grayMatch.h
@ -1,12 +1,12 @@
|
||||
#ifndef GRAY_MATCH_H
|
||||
#define GRAY_MATCH_H
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include "apiExport.h"
|
||||
|
||||
struct Model;
|
||||
|
||||
using Model_t = Model *;
|
||||
|
||||
struct Pose {
|
||||
float x;
|
||||
float y;
|
||||
@ -14,93 +14,11 @@ struct Pose {
|
||||
float score;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief train match model
|
||||
* @param data image data
|
||||
* @param width image width
|
||||
* @param height image height
|
||||
* @param channels image channels 1(gray)/3(rgb)/4(rgba)
|
||||
* @param bytesPerLine bytes per line
|
||||
* @param roiLeft rectangle roi left
|
||||
* @param roiTop rectangle roi top
|
||||
* @param roiWidth rectangle roi width
|
||||
* @param roiHeight rectangle roi height
|
||||
* @param levelNum pyramid levels (> 0:user setting,-1:auto)
|
||||
* @return
|
||||
*/
|
||||
API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height, int channels,
|
||||
int bytesPerLine, int roiLeft, int roiTop, int roiWidth,
|
||||
int roiHeight, int levelNum);
|
||||
/**
|
||||
* @brief match model
|
||||
* @param data image data
|
||||
* @param width image width
|
||||
* @param height image height
|
||||
* @param channels image channels 1(gray)/3(rgb)/4(rgba)
|
||||
* @param bytesPerLine bytes per line
|
||||
* @param roiLeft rectangle roi left
|
||||
* @param roiTop rectangle roi top
|
||||
* @param roiWidth rectangle roi width
|
||||
* @param roiHeight rectangle roi height
|
||||
* @param model trained model
|
||||
* @param count in(max detect count)/out(found count)
|
||||
* @param poses pose array inited with size not less than count
|
||||
* @param level match start at which level (level>=0 && level<modelLevel-1,-1:auto)
|
||||
* @param startAngle rotation start angle
|
||||
* @param spanAngle rotation angle range
|
||||
* @param maxOverlap overlap threshold
|
||||
* @param minScore minimum matched score
|
||||
* @param subpixel compute subpixel result
|
||||
* @return
|
||||
*/
|
||||
API_PUBLIC void matchModel(const unsigned char *data, int width, int height, int channels,
|
||||
int bytesPerLine, int roiLeft, int roiTop, int roiWidth, int roiHeight,
|
||||
Model_t model, int *count, Pose *poses, int level, double startAngle,
|
||||
double spanAngle, double maxOverlap, double minScore, int subpixel);
|
||||
API_PUBLIC Model *trainModel(const cv::Mat &src, int level, double startAngle, double spanAngle,
|
||||
double angleStep);
|
||||
|
||||
/**
|
||||
* @brief get trained model levels
|
||||
* @param model
|
||||
* @return pyramid level
|
||||
*/
|
||||
API_PUBLIC int modelLevel(Model_t model);
|
||||
API_PUBLIC void matchModel(const cv::Mat &dst, const Model *model, int *count, Pose *poses,
|
||||
int level, double startAngle, double spanAngle, double maxOverlap,
|
||||
double minScore, int subpixel);
|
||||
|
||||
/**
|
||||
* @brief get trained model image
|
||||
* @param model
|
||||
* @param level pyramid level index(level>=0 && level<modelLevel-1)
|
||||
* @param data image data buffer(need allocated), can input nullptr to query width/height/channels
|
||||
* @param length buffer length not less than width*height*channels
|
||||
* @param width image width, can input nullptr
|
||||
* @param height image height, can input nullptr
|
||||
* @param channels image channels, can input nullptr
|
||||
* @return
|
||||
*/
|
||||
API_PUBLIC void modelImage(Model_t model, int level, unsigned char *data, int length, int *width,
|
||||
int *height, int *channels);
|
||||
|
||||
/**
|
||||
* @brief free model
|
||||
* @param model
|
||||
* @return
|
||||
*/
|
||||
API_PUBLIC void freeModel(Model_t *model);
|
||||
|
||||
/**
|
||||
* @brief serialize model to buffer
|
||||
* @param model
|
||||
* @param buffer need allocated, can input nullptr to query size
|
||||
* @param size in(buffer size)/out(written size)
|
||||
* @return true(success)false(failed)
|
||||
*/
|
||||
API_PUBLIC bool serialize(Model_t model, unsigned char *buffer, int *size);
|
||||
|
||||
/**
|
||||
* @brief deserialize model
|
||||
* @param buffer
|
||||
* @param size buffer size
|
||||
* @return model
|
||||
*/
|
||||
API_PUBLIC Model_t deserialize(unsigned char *buffer, int size);
|
||||
|
||||
#endif // GRAY_MATCH_H
|
||||
#endif // GRAY_MATCH_H
|
BIN
img/model3.png
Normal file
BIN
img/model3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
BIN
img/model3_src2.png
Normal file
BIN
img/model3_src2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 336 KiB |
BIN
img/result.png
BIN
img/result.png
Binary file not shown.
Before Width: | Height: | Size: 374 KiB |
180
integral.cpp
180
integral.cpp
@ -1,180 +0,0 @@
|
||||
#include "integral.h"
|
||||
#include "privateType.h"
|
||||
|
||||
#include <opencv2/core/hal/intrin.hpp>
|
||||
|
||||
inline void expand(const cv::v_int32 &src, cv::v_float64 &low, cv::v_float64 &high) {
|
||||
low = cv::v_cvt_f64(src);
|
||||
high = cv::v_cvt_f64_high(src);
|
||||
}
|
||||
|
||||
inline void integralSum(const cv::v_uint16 &src, double *dst, const double *prevDst,
|
||||
cv::v_uint32 &pre) {
|
||||
auto sum = cv::v_add(src, cv::v_rotate_left<1>(src));
|
||||
sum = cv::v_add(sum, cv::v_rotate_left<2>(sum));
|
||||
sum = cv::v_add(sum, cv::v_rotate_left<4>(sum));
|
||||
|
||||
cv::v_uint32 v1;
|
||||
cv::v_uint32 v2;
|
||||
cv::v_expand(sum, v1, v2);
|
||||
v1 = cv::v_add(v1, pre);
|
||||
v2 = cv::v_add(v2, pre);
|
||||
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
|
||||
|
||||
cv::v_float64 v3;
|
||||
cv::v_float64 v4;
|
||||
expand(cv::v_reinterpret_as_s32(v1), v3, v4);
|
||||
cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
|
||||
cv::v_store(dst + simdSize(cv::v_float64),
|
||||
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
|
||||
|
||||
expand(cv::v_reinterpret_as_s32(v2), v3, v4);
|
||||
cv::v_store(dst + simdSize(cv::v_float64) * 2,
|
||||
cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
|
||||
cv::v_store(dst + simdSize(cv::v_float64) * 3,
|
||||
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
|
||||
}
|
||||
|
||||
inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
|
||||
cv::v_uint32 v1;
|
||||
cv::v_uint32 v2;
|
||||
cv::v_expand(src, v1, v2);
|
||||
|
||||
{
|
||||
auto shift1 = cv::v_rotate_left<1>(src);
|
||||
cv::v_uint32 v3;
|
||||
cv::v_uint32 v4;
|
||||
cv::v_expand(shift1, v3, v4);
|
||||
|
||||
v1 = cv::v_add(v1, v3);
|
||||
v2 = cv::v_add(v2, v4);
|
||||
|
||||
v4 = cv::v_extract<2>(v1, v2);
|
||||
v2 = cv::v_add(v2, v4);
|
||||
|
||||
v3 = cv::v_rotate_left<2>(v1);
|
||||
v1 = cv::v_add(v1, v3);
|
||||
|
||||
v1 = cv::v_add(v1, pre);
|
||||
v2 = cv::v_add(v2, v1);
|
||||
|
||||
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
|
||||
}
|
||||
|
||||
cv::v_float64 v3;
|
||||
cv::v_float64 v4;
|
||||
expand(cv::v_reinterpret_as_s32(v1), v3, v4);
|
||||
cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
|
||||
cv::v_store(dst + simdSize(cv::v_float64),
|
||||
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
|
||||
|
||||
expand(cv::v_reinterpret_as_s32(v2), v3, v4);
|
||||
cv::v_store(dst + simdSize(cv::v_float64) * 2,
|
||||
cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
|
||||
cv::v_store(dst + simdSize(cv::v_float64) * 3,
|
||||
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
|
||||
}
|
||||
|
||||
/*
|
||||
inline void integralSqSum(cv::v_uint32 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
|
||||
src += cv::v_rotate_left<1>(src);
|
||||
src += cv::v_rotate_left<2>(src);
|
||||
src += pre;
|
||||
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(src));
|
||||
|
||||
cv::v_float64 v1;
|
||||
cv::v_float64 v2;
|
||||
expand(cv::v_reinterpret_as_s32(src), v1, v2);
|
||||
|
||||
cv::v_store(dst, v1 + cv::v_load(prevDst));
|
||||
cv::v_store(dst + simdSize(cv::v_float64), v2 + cv::v_load(prevDst +
|
||||
simdSize(cv::v_float64)));
|
||||
}
|
||||
|
||||
inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
|
||||
cv::v_uint32 v1;
|
||||
cv::v_uint32 v2;
|
||||
cv::v_expand(src, v1, v2);
|
||||
integralSqSum(v1, dst, prevDst, pre);
|
||||
integralSqSum(v2, dst + simdSize(cv::v_uint32), prevDst + simdSize(cv::v_uint32),
|
||||
pre);
|
||||
}
|
||||
*/
|
||||
|
||||
inline void integralSum(const cv::v_uint16 &v1, const cv::v_uint16 &v2, double *dst,
|
||||
const double *prevDst, cv::v_uint32 &pre) {
|
||||
integralSum(v1, dst, prevDst, pre);
|
||||
integralSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
|
||||
}
|
||||
|
||||
inline void integralSqSum(cv::v_uint16 &v1, cv::v_uint16 &v2, double *dst, double *prevDst,
|
||||
cv::v_uint32 &pre) {
|
||||
v1 = cv::v_mul_wrap(v1, v1);
|
||||
v2 = cv::v_mul_wrap(v2, v2);
|
||||
|
||||
integralSqSum(v1, dst, prevDst, pre);
|
||||
integralSqSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
|
||||
}
|
||||
|
||||
void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum) {
|
||||
const auto size = src.size() + cv::Size(1, 1);
|
||||
sum.create(size, CV_64FC1);
|
||||
sqSum.create(size, CV_64FC1);
|
||||
memset(sum.data, 0, sum.step[ 0 ]);
|
||||
memset(sqSum.data, 0, sqSum.step[ 0 ]);
|
||||
|
||||
const auto *srcStart = src.data;
|
||||
const auto srcStep = src.step[ 0 ];
|
||||
auto *sumStart = reinterpret_cast<double *>(sum.data) + sum.step1() + 1;
|
||||
const auto sumStep = sum.step[ 0 ] / sum.step[ 1 ];
|
||||
auto *sqSumStart = reinterpret_cast<double *>(sqSum.data) + sqSum.step1() + 1;
|
||||
const auto sqSumStep = sqSum.step[ 0 ] / sqSum.step[ 1 ];
|
||||
const auto end = size.width - simdSize(cv::v_uint8);
|
||||
for (int y = 0; y < src.rows; y++) {
|
||||
auto *srcPtr = srcStart + srcStep * y;
|
||||
auto *sumPtr = sumStart + sumStep * y;
|
||||
const auto *preSumPtr = sumStart + sumStep * (y - 1);
|
||||
sumPtr[ -1 ] = 0;
|
||||
|
||||
cv::v_uint32 prevSum = cv::vx_setzero_u32();
|
||||
for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
|
||||
cv::v_uint16 v1;
|
||||
cv::v_uint16 v2;
|
||||
cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
|
||||
|
||||
integralSum(v1, v2, sumPtr + x, preSumPtr + x, prevSum);
|
||||
}
|
||||
}
|
||||
|
||||
for (int y = 0; y < src.rows; y++) {
|
||||
auto *srcPtr = srcStart + srcStep * y;
|
||||
auto *sqSumPtr = sqSumStart + sqSumStep * y;
|
||||
auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
|
||||
sqSumPtr[ -1 ] = 0;
|
||||
|
||||
cv::v_uint32 prevSqSum = cv::vx_setzero_u32();
|
||||
for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
|
||||
cv::v_uint16 v1;
|
||||
cv::v_uint16 v2;
|
||||
cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
|
||||
|
||||
integralSqSum(v1, v2, sqSumPtr + x, preSqSumPtr + x, prevSqSum);
|
||||
}
|
||||
}
|
||||
|
||||
const auto start = src.cols - src.cols % simdSize(cv::v_uint8);
|
||||
for (int y = 0; y < src.rows; y++) {
|
||||
auto *srcPtr = srcStart + srcStep * y;
|
||||
auto *sumPtr = sumStart + sumStep * y;
|
||||
auto *sqSumPtr = sqSumStart + sqSumStep * y;
|
||||
const auto *preSumPtr = sumStart + sumStep * (y - 1);
|
||||
const auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
|
||||
for (int x = start; x < src.cols; x++) {
|
||||
const auto val = srcPtr[ x ];
|
||||
const auto sqVal = val * val;
|
||||
|
||||
sumPtr[ x ] = sumPtr[ x - 1 ] + val + preSumPtr[ x ] - preSumPtr[ x - 1 ];
|
||||
sqSumPtr[ x ] = sqSumPtr[ x - 1 ] + sqVal + preSqSumPtr[ x ] - preSqSumPtr[ x - 1 ];
|
||||
}
|
||||
}
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
#ifndef INTEGRAL_H
|
||||
#define INTEGRAL_H
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum);
|
||||
|
||||
#endif // INTEGRAL_H
|
72
main.cpp
72
main.cpp
@ -1,9 +1,9 @@
|
||||
#include "grayMatch.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <opencv2/core/utility.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <string>
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
const std::string keys = "{model m || model image}"
|
||||
@ -24,8 +24,8 @@ int main(int argc, const char *argv[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto srcFile = std::string(IMG_DIR) + "/3.bmp";
|
||||
auto dstFile = std::string(IMG_DIR) + "/h.bmp";
|
||||
auto srcFile = std::string(IMG_DIR) + "/model3.png";
|
||||
auto dstFile = std::string(IMG_DIR) + "/model3_src2.png";
|
||||
if (cmd.has("model"))
|
||||
srcFile = cmd.get<std::string>("model");
|
||||
if (cmd.has("scene"))
|
||||
@ -37,66 +37,19 @@ int main(int argc, const char *argv[]) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const std::string modelName("model.bin");
|
||||
{
|
||||
auto t0 = cv::getTickCount();
|
||||
auto model = trainModel(src.data, src.cols, src.rows, src.channels(),
|
||||
static_cast<int>(src.step), 0, 0, src.cols, src.rows, -1);
|
||||
auto t1 = cv::getTickCount();
|
||||
|
||||
// get size
|
||||
int size;
|
||||
serialize(model, nullptr, &size);
|
||||
|
||||
// serialize to buffer
|
||||
std::vector<uchar> buffer(size);
|
||||
serialize(model, buffer.data(), &size);
|
||||
|
||||
// write to file
|
||||
std::ofstream ofs(modelName, std::ios::binary | std::ios::out);
|
||||
if (!ofs.is_open()) {
|
||||
return -1;
|
||||
}
|
||||
ofs.write(reinterpret_cast<const char *>(buffer.data()), size);
|
||||
|
||||
freeModel(&model);
|
||||
|
||||
auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
|
||||
std::cout << "train(s):" << trainCost << std::endl;
|
||||
}
|
||||
|
||||
int count = 70;
|
||||
std::vector<Pose> poses(count);
|
||||
Model_t model;
|
||||
auto score = cmd.get<float>("threshold");
|
||||
{
|
||||
// open file
|
||||
std::ifstream ifs(modelName, std::ios::binary | std::ios::in);
|
||||
if (!ifs.is_open()) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
// get size
|
||||
ifs.seekg(0, std::ios::end);
|
||||
auto size = ifs.tellg();
|
||||
ifs.seekg(0, std::ios::beg);
|
||||
|
||||
// read to buffer
|
||||
std::vector<uchar> buffer(size);
|
||||
ifs.read(reinterpret_cast<char *>(buffer.data()), size);
|
||||
|
||||
// deserialize from buffer
|
||||
model = deserialize(buffer.data(), static_cast<int>(buffer.size()));
|
||||
|
||||
auto t2 = cv::getTickCount();
|
||||
matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0, 0,
|
||||
dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
|
||||
auto t3 = cv::getTickCount();
|
||||
|
||||
auto matchCost = static_cast<double>(t3 - t2) / cv::getTickFrequency();
|
||||
std::cout << "match(s):" << matchCost << std::endl;
|
||||
}
|
||||
auto t0 = cv::getTickCount();
|
||||
auto model = trainModel(src, -1, 0, 360, -1);
|
||||
auto t1 = cv::getTickCount();
|
||||
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
|
||||
auto t2 = cv::getTickCount();
|
||||
|
||||
const auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
|
||||
const auto matchCost = static_cast<double>(t2 - t1) / cv::getTickFrequency();
|
||||
std::cout << "train(s):" << trainCost << " match(s):" << matchCost << std::endl;
|
||||
for (int i = 0; i < count; i++) {
|
||||
const auto &pose = poses[ i ];
|
||||
std::cout << pose.x << "," << pose.y << "," << pose.angle << "," << pose.score << std::endl;
|
||||
@ -107,8 +60,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
auto start = cv::getTickCount();
|
||||
for (int i = 0; i < times; i++) {
|
||||
matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0,
|
||||
0, dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
|
||||
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
|
||||
count = 70;
|
||||
}
|
||||
auto end = cv::getTickCount();
|
||||
|
@ -2,40 +2,49 @@
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
struct HRLE {
|
||||
int row = -1;
|
||||
int startColumn = -1;
|
||||
int length = 0;
|
||||
};
|
||||
|
||||
struct VRLE {
|
||||
int col = -1;
|
||||
int startRow = -1;
|
||||
int length = 0;
|
||||
};
|
||||
|
||||
using HRegion = std::vector<HRLE>;
|
||||
using VRegion = std::vector<VRLE>;
|
||||
|
||||
struct Template {
|
||||
cv::Mat img;
|
||||
HRegion hRegion;
|
||||
VRegion vRegion;
|
||||
cv::RotatedRect rect;
|
||||
|
||||
double mean = 0;
|
||||
double normal = 0;
|
||||
double invArea = 0;
|
||||
};
|
||||
|
||||
struct Layer {
|
||||
double angleStep = 0;
|
||||
|
||||
std::vector<Template> templates;
|
||||
};
|
||||
|
||||
struct Model {
|
||||
std::vector<cv::Mat> pyramids;
|
||||
std::vector<cv::Scalar> mean;
|
||||
std::vector<double> normal;
|
||||
std::vector<double> invArea;
|
||||
std::vector<uchar> equal1;
|
||||
uchar borderColor = 0;
|
||||
double startAngle = 0;
|
||||
double stopAngle = 0;
|
||||
double angleStep = 0;
|
||||
|
||||
void clear() {
|
||||
pyramids.clear();
|
||||
normal.clear();
|
||||
invArea.clear();
|
||||
mean.clear();
|
||||
equal1.clear();
|
||||
}
|
||||
|
||||
void resize(const std::size_t size) {
|
||||
normal.resize(size);
|
||||
invArea.resize(size);
|
||||
mean.resize(size);
|
||||
equal1.resize(size);
|
||||
}
|
||||
|
||||
void reserve(const std::size_t size) {
|
||||
pyramids.reserve(size);
|
||||
normal.reserve(size);
|
||||
invArea.reserve(size);
|
||||
mean.reserve(size);
|
||||
equal1.reserve(size);
|
||||
}
|
||||
cv::Size srcSize;
|
||||
std::vector<Layer> layers;
|
||||
};
|
||||
|
||||
#if CV_VERSION_MAJOR >= 4 && CV_VERSION_MINOR >= 8
|
||||
#define simdSize(type) cv::VTraits<type>::nlanes
|
||||
#else
|
||||
#define simdSize(type) type::nlanes
|
||||
#endif
|
||||
#endif
|
228
serialize.cpp
228
serialize.cpp
@ -1,228 +0,0 @@
|
||||
#include "grayMatch.h"
|
||||
#include "privateType.h"
|
||||
|
||||
#include <opencv2/core/hal/intrin.hpp>
|
||||
|
||||
class Buffer {
|
||||
public:
|
||||
Buffer(const int size_, unsigned char *data_)
|
||||
: m_size(size_)
|
||||
, m_data(data_) {}
|
||||
|
||||
virtual ~Buffer() = default;
|
||||
|
||||
virtual void operator&(uchar &val) = 0;
|
||||
virtual void operator&(std::vector<cv::Mat> &val) = 0;
|
||||
virtual void operator&(std::vector<cv::Scalar> &val) = 0;
|
||||
virtual void operator&(std::vector<double> &val) = 0;
|
||||
virtual void operator&(std::vector<uchar> &val) = 0;
|
||||
|
||||
void operator&(Model &val) {
|
||||
this->operator&(val.pyramids);
|
||||
this->operator&(val.mean);
|
||||
this->operator&(val.normal);
|
||||
this->operator&(val.invArea);
|
||||
this->operator&(val.equal1);
|
||||
this->operator&(val.borderColor);
|
||||
}
|
||||
|
||||
[[nodiscard]] int count() const {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
protected:
|
||||
int m_size = 0;
|
||||
unsigned char *m_data = nullptr;
|
||||
};
|
||||
|
||||
void binWrite(void *const dst, const void *src, const int size) {
|
||||
memcpy(dst, src, size);
|
||||
}
|
||||
|
||||
void fakeWrite(void *const dst, const void *src, const int size) {
|
||||
(void)dst;
|
||||
(void)src;
|
||||
(void)size;
|
||||
}
|
||||
|
||||
using Write = void (*)(void *, const void *, int);
|
||||
|
||||
template <Write write> class OutBuffer final : public Buffer {
|
||||
public:
|
||||
explicit OutBuffer(unsigned char *const data_)
|
||||
: Buffer(0, data_) {}
|
||||
|
||||
void operator&(uchar &val) override {
|
||||
write(m_data + m_size, &val, sizeof(val));
|
||||
m_size += static_cast<int>(sizeof(val));
|
||||
}
|
||||
void operator&(std::vector<cv::Mat> &val) override {
|
||||
const int size = static_cast<int>(val.size());
|
||||
write(m_data + m_size, &size, sizeof(size));
|
||||
m_size += static_cast<int>(sizeof(size));
|
||||
|
||||
for (auto &element : val) {
|
||||
writeElement(element);
|
||||
}
|
||||
}
|
||||
void writeElement(cv::Mat &val) {
|
||||
write(m_data + m_size, &val.cols, sizeof(int));
|
||||
m_size += static_cast<int>(sizeof(int));
|
||||
|
||||
write(m_data + m_size, &val.rows, sizeof(int));
|
||||
m_size += static_cast<int>(sizeof(int));
|
||||
|
||||
for (int i = 0; i < val.rows; i++) {
|
||||
write(m_data + m_size, val.ptr<unsigned char>(i), val.cols);
|
||||
m_size += val.cols;
|
||||
}
|
||||
}
|
||||
void operator&(std::vector<cv::Scalar> &val) override {
|
||||
const int size = static_cast<int>(val.size());
|
||||
write(m_data + m_size, &size, sizeof(size));
|
||||
m_size += static_cast<int>(sizeof(size));
|
||||
|
||||
for (auto &element : val) {
|
||||
writeElement(element);
|
||||
}
|
||||
}
|
||||
void writeElement(const cv::Scalar &val) {
|
||||
write(m_data + m_size, val.val, sizeof(double) * 4);
|
||||
m_size += static_cast<int>(sizeof(double)) * 4;
|
||||
}
|
||||
void operator&(std::vector<double> &val) override {
|
||||
const int size = static_cast<int>(val.size());
|
||||
write(m_data + m_size, &size, sizeof(size));
|
||||
m_size += static_cast<int>(sizeof(size));
|
||||
|
||||
write(m_data + m_size, val.data(), static_cast<int>(sizeof(double)) * size);
|
||||
m_size += static_cast<int>(sizeof(double)) * size;
|
||||
}
|
||||
void operator&(std::vector<uchar> &val) override {
|
||||
const int size = static_cast<int>(val.size());
|
||||
write(m_data + m_size, &size, sizeof(size));
|
||||
m_size += static_cast<int>(sizeof(size));
|
||||
|
||||
write(m_data + m_size, val.data(), sizeof(uchar) * size);
|
||||
m_size += static_cast<int>(sizeof(uchar)) * size;
|
||||
}
|
||||
};
|
||||
|
||||
using SizeCountBuffer = OutBuffer<fakeWrite>;
|
||||
using WriteBuffer = OutBuffer<binWrite>;
|
||||
|
||||
class ReadBuffer final : public Buffer {
|
||||
public:
|
||||
explicit ReadBuffer(unsigned char *data_)
|
||||
: Buffer(0, data_) {}
|
||||
|
||||
void operator&(uchar &val) override {
|
||||
memcpy(&val, m_data + m_size, sizeof(uchar));
|
||||
m_size += static_cast<int>(sizeof(uchar));
|
||||
}
|
||||
void operator&(std::vector<cv::Mat> &val) override {
|
||||
int count = 0;
|
||||
memcpy(&count, m_data + m_size, sizeof(int));
|
||||
val.resize(count);
|
||||
m_size += static_cast<int>(sizeof(count));
|
||||
|
||||
for (auto &element : val) {
|
||||
read(element);
|
||||
}
|
||||
}
|
||||
void read(cv::Mat &val) {
|
||||
int width = 0;
|
||||
memcpy(&width, m_data + m_size, sizeof(int));
|
||||
m_size += static_cast<int>(sizeof(int));
|
||||
|
||||
int height = 0;
|
||||
memcpy(&height, m_data + m_size, sizeof(int));
|
||||
m_size += static_cast<int>(sizeof(int));
|
||||
|
||||
const int alignedWidth = static_cast<int>(cv::alignSize(width, simdSize(cv::v_uint8)));
|
||||
const auto img = cv::Mat::zeros(height, alignedWidth, CV_8UC1);
|
||||
val = img(cv::Rect(0, 0, width, height));
|
||||
|
||||
for (int y = 0; y < height; y++) {
|
||||
auto *ptr = val.ptr<uchar>(y);
|
||||
memcpy(ptr, m_data + m_size, width);
|
||||
m_size += width;
|
||||
}
|
||||
}
|
||||
void operator&(std::vector<cv::Scalar> &val) override {
|
||||
int count = 0;
|
||||
memcpy(&count, m_data + m_size, sizeof(int));
|
||||
val.resize(count);
|
||||
m_size += static_cast<int>(sizeof(count));
|
||||
|
||||
for (auto &element : val) {
|
||||
read(element);
|
||||
}
|
||||
}
|
||||
void read(cv::Scalar &val) {
|
||||
memcpy(val.val, m_data + m_size, sizeof(double) * 4);
|
||||
m_size += static_cast<int>(sizeof(double)) * 4;
|
||||
}
|
||||
void operator&(std::vector<double> &val) override {
|
||||
int count = 0;
|
||||
memcpy(&count, m_data + m_size, sizeof(int));
|
||||
val.resize(count);
|
||||
m_size += static_cast<int>(sizeof(count));
|
||||
|
||||
memcpy(val.data(), m_data + m_size, sizeof(double) * count);
|
||||
m_size += static_cast<int>(sizeof(double)) * count;
|
||||
}
|
||||
void operator&(std::vector<uchar> &val) override {
|
||||
int count = 0;
|
||||
memcpy(&count, m_data + m_size, sizeof(int));
|
||||
val.resize(count);
|
||||
m_size += static_cast<int>(sizeof(count));
|
||||
|
||||
memcpy(val.data(), m_data + m_size, sizeof(bool) * count);
|
||||
m_size += static_cast<int>(sizeof(uchar)) * count;
|
||||
}
|
||||
};
|
||||
|
||||
void operation(Buffer *buf, Model &model) {
|
||||
*buf &model;
|
||||
}
|
||||
|
||||
bool serialize(Model *const model, unsigned char *buffer, int *size) {
|
||||
if (nullptr == size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nullptr == model) {
|
||||
*size = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
SizeCountBuffer counter(buffer);
|
||||
operation(&counter, *model);
|
||||
|
||||
if (nullptr == buffer) {
|
||||
*size = counter.count();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (counter.count() > *size) {
|
||||
*size = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
WriteBuffer writer(buffer);
|
||||
operation(&writer, *model);
|
||||
return true;
|
||||
}
|
||||
|
||||
Model_t deserialize(unsigned char *buffer, const int size) {
|
||||
if (size < 1 || nullptr == buffer) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ReadBuffer reader(buffer);
|
||||
auto *model = new Model;
|
||||
operation(&reader, *model);
|
||||
|
||||
return model;
|
||||
}
|
240
sum.cpp
Normal file
240
sum.cpp
Normal file
@ -0,0 +1,240 @@
|
||||
#include "sum.h"
|
||||
|
||||
#include <opencv2/core/hal/intrin.hpp>
|
||||
|
||||
inline cv::v_uint32x4 v_add_expand(const cv::v_uint16x8 &src) {
|
||||
cv::v_uint32x4 low;
|
||||
cv::v_uint32x4 high;
|
||||
cv::v_expand(src, low, high);
|
||||
|
||||
return cv::v_add(low, high);
|
||||
}
|
||||
|
||||
inline cv::v_uint64x2 v_add_expand(const cv::v_uint32x4 &src) {
|
||||
cv::v_uint64x2 low;
|
||||
cv::v_uint64x2 high;
|
||||
cv::v_expand(src, low, high);
|
||||
|
||||
return cv::v_add(low, high);
|
||||
}
|
||||
|
||||
inline void computeSum(const cv::v_uint8x16 &src, cv::v_uint32x4 &sum, cv::v_uint64x2 &sqSum) {
|
||||
cv::v_uint16x8 low;
|
||||
cv::v_uint16x8 high;
|
||||
cv::v_expand(src, low, high);
|
||||
|
||||
sum = cv::v_add(sum, v_add_expand(cv::v_add(low, high)));
|
||||
const auto dot = cv::v_dotprod_expand_fast(src, src);
|
||||
sqSum = cv::v_add(sqSum, v_add_expand(dot));
|
||||
}
|
||||
|
||||
void computeSum(const cv::Mat &src, const HRegion &hRegion, uint64 &sum, uint64 &sqSum) {
|
||||
constexpr auto blockSize = simdSize(cv::v_uint8);
|
||||
const auto *srcPtr = src.data;
|
||||
cv::v_uint32x4 vSum = cv::v_setzero_u32();
|
||||
cv::v_uint64x2 vSqSum = cv::v_setzero_u64();
|
||||
uint32_t partSum = 0;
|
||||
uint64 partSqSum = 0;
|
||||
|
||||
for (const auto &rle : hRegion) {
|
||||
const auto *ptr = srcPtr + src.step * rle.row + rle.startColumn;
|
||||
|
||||
int i = 0;
|
||||
for (; i < rle.length - blockSize; i += blockSize) {
|
||||
computeSum(cv::v_load(ptr + i), vSum, vSqSum);
|
||||
}
|
||||
|
||||
// TODO aligned fill 0
|
||||
for (; i < rle.length; i++) {
|
||||
const auto val = ptr[ i ];
|
||||
partSum += val;
|
||||
partSqSum += static_cast<ushort>(val) * static_cast<ushort>(val);
|
||||
}
|
||||
}
|
||||
|
||||
sum = cv::v_reduce_sum(vSum) + partSum;
|
||||
sqSum = cv::v_reduce_sum(vSqSum) + partSqSum;
|
||||
}
|
||||
|
||||
inline void computeSumDiff(const cv::v_uint16x8 &start, const cv::v_uint16x8 &end,
|
||||
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1) {
|
||||
cv::v_int16x8 sub;
|
||||
{
|
||||
const auto vStart = cv::v_reinterpret_as_s16(start);
|
||||
const auto vEnd = cv::v_reinterpret_as_s16(end);
|
||||
sub = cv::v_sub(vEnd, vStart);
|
||||
}
|
||||
|
||||
cv::v_int32x4 val = cv::v_expand_low(sub);
|
||||
diff0 = cv::v_add(diff0, val);
|
||||
|
||||
val = cv::v_expand_high(sub);
|
||||
diff1 = cv::v_add(diff1, val);
|
||||
}
|
||||
|
||||
inline void computeSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
|
||||
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
|
||||
cv::v_int32x4 &diff3) {
|
||||
computeSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0, diff1);
|
||||
computeSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff2, diff3);
|
||||
}
|
||||
|
||||
inline void computeSqSumDiff(const cv::v_uint32x4 &start, const cv::v_uint32x4 &end,
|
||||
cv::v_int32x4 &diff0) {
|
||||
const cv::v_int32x4 vStart = cv::v_reinterpret_as_s32(start);
|
||||
const cv::v_int32x4 vEnd = cv::v_reinterpret_as_s32(end);
|
||||
|
||||
const cv::v_int32x4 sub = cv::v_sub(vEnd, vStart);
|
||||
diff0 = cv::v_add(diff0, sub);
|
||||
}
|
||||
|
||||
inline void computeSqSumDiff(cv::v_uint16x8 &start, cv::v_uint16x8 &end, cv::v_int32x4 &diff0,
|
||||
cv::v_int32x4 &diff1) {
|
||||
start = cv::v_mul(start, start);
|
||||
end = cv::v_mul(end, end);
|
||||
|
||||
computeSqSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0);
|
||||
computeSqSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff1);
|
||||
}
|
||||
|
||||
inline void computeSqSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
|
||||
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
|
||||
cv::v_int32x4 &diff3) {
|
||||
|
||||
auto vStart = cv::v_expand_low(start);
|
||||
auto vEnd = cv::v_expand_low(end);
|
||||
computeSqSumDiff(vStart, vEnd, diff0, diff1);
|
||||
|
||||
vStart = cv::v_expand_high(start);
|
||||
vEnd = cv::v_expand_high(end);
|
||||
computeSqSumDiff(vStart, vEnd, diff2, diff3);
|
||||
}
|
||||
|
||||
inline void v_expand_store(double *ptr, const std::array<int, 4> &val) {
|
||||
ptr[ 0 ] = ptr[ -1 ] + val[ 0 ];
|
||||
ptr[ 1 ] = ptr[ 0 ] + val[ 1 ];
|
||||
ptr[ 2 ] = ptr[ 1 ] + val[ 2 ];
|
||||
ptr[ 3 ] = ptr[ 2 ] + val[ 3 ];
|
||||
}
|
||||
|
||||
void shiftH(const uchar *src, std::size_t srcStep, const HRegion &hRegion, int row, double *sum,
|
||||
std::size_t sumStep, int sumWidth, double *sqSum, std::size_t sqSumStep) {
|
||||
constexpr auto blockSize = simdSize(cv::v_uint8);
|
||||
auto *srcPtr = src;
|
||||
auto *sumPtr = sum + row * sumStep;
|
||||
auto *sqSumPtr = sqSum + row * sqSumStep;
|
||||
|
||||
std::array<int, 4> buf{};
|
||||
|
||||
int i = 1;
|
||||
for (; i < sumWidth - blockSize; i += blockSize) {
|
||||
cv::v_int32x4 diff0 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff1 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff2 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff3 = cv::v_setzero_s32();
|
||||
|
||||
cv::v_int32x4 diff10 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff11 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff12 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff13 = cv::v_setzero_s32();
|
||||
|
||||
for (const auto &rle : hRegion) {
|
||||
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
|
||||
auto *endPtr = startPtr + rle.length;
|
||||
|
||||
auto vStart = cv::v_load(startPtr);
|
||||
auto vEnd = cv::v_load(endPtr);
|
||||
computeSumDiff(vStart, vEnd, diff0, diff1, diff2, diff3);
|
||||
computeSqSumDiff(vStart, vEnd, diff10, diff11, diff12, diff13);
|
||||
}
|
||||
|
||||
auto *sumPtrStart = sumPtr + i;
|
||||
cv::v_store(buf.data(), diff0);
|
||||
v_expand_store(sumPtrStart, buf);
|
||||
cv::v_store(buf.data(), diff1);
|
||||
v_expand_store(sumPtrStart + 4, buf);
|
||||
cv::v_store(buf.data(), diff2);
|
||||
v_expand_store(sumPtrStart + 8, buf);
|
||||
cv::v_store(buf.data(), diff3);
|
||||
v_expand_store(sumPtrStart + 12, buf);
|
||||
|
||||
auto *sqSumPtrStart = sqSumPtr + i;
|
||||
cv::v_store(buf.data(), diff10);
|
||||
v_expand_store(sqSumPtrStart, buf);
|
||||
cv::v_store(buf.data(), diff11);
|
||||
v_expand_store(sqSumPtrStart + 4, buf);
|
||||
cv::v_store(buf.data(), diff12);
|
||||
v_expand_store(sqSumPtrStart + 8, buf);
|
||||
cv::v_store(buf.data(), diff13);
|
||||
v_expand_store(sqSumPtrStart + 12, buf);
|
||||
}
|
||||
|
||||
for (; i < sumWidth; i++) {
|
||||
int32_t partSum = 0;
|
||||
int32_t partSqSum = 0;
|
||||
for (const auto &rle : hRegion) {
|
||||
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
|
||||
auto *endPtr = startPtr + rle.length;
|
||||
|
||||
const int32_t start = *startPtr;
|
||||
const int32_t end = *endPtr;
|
||||
partSum += end - start;
|
||||
partSqSum += end * end - start * start;
|
||||
}
|
||||
|
||||
auto *sumPtrStart = sumPtr + i;
|
||||
sumPtrStart[ 0 ] = sumPtrStart[ -1 ] + partSum;
|
||||
auto *sqSumPtrStart = sqSumPtr + i;
|
||||
sqSumPtrStart[ 0 ] = sqSumPtrStart[ -1 ] + partSqSum;
|
||||
}
|
||||
}
|
||||
|
||||
void shiftV(const uchar *src, std::size_t srcStep, const VRegion &vRegion, int row, double *sum,
|
||||
std::size_t sumStep, double *sqSum, std::size_t sqSumStep) {
|
||||
auto *srcPtr = src;
|
||||
auto *sumPtr = sum + row * sumStep;
|
||||
auto *sqSumPtr = sqSum + row * sqSumStep;
|
||||
|
||||
int32_t partSum = 0;
|
||||
int32_t partSqSum = 0;
|
||||
for (const auto &rle : vRegion) {
|
||||
auto *startPtr = srcPtr + (row + rle.startRow - 1) * srcStep + rle.col;
|
||||
auto *endPtr = startPtr + rle.length * srcStep;
|
||||
|
||||
const int32_t start = *startPtr;
|
||||
const int32_t end = *endPtr;
|
||||
|
||||
partSum += end - start;
|
||||
partSqSum += end * end - start * start;
|
||||
}
|
||||
|
||||
sumPtr[ 0 ] = *(sumPtr - sumStep) + partSum;
|
||||
sqSumPtr[ 0 ] = *(sqSumPtr - sqSumStep) + partSqSum;
|
||||
}
|
||||
|
||||
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
|
||||
const HRegion &hRegion, const VRegion &vRegion) {
|
||||
const auto size = src.size() - templateSize + cv::Size(1, 1);
|
||||
sum.create(size, CV_64FC1);
|
||||
sqSum.create(size, CV_64FC1);
|
||||
|
||||
const auto *srcPtr = src.data;
|
||||
auto *sumPtr = reinterpret_cast<double *>(sum.data);
|
||||
auto *sqSumPtr = reinterpret_cast<double *>(sqSum.data);
|
||||
const auto sumStep = sum.step1();
|
||||
const auto sqSumStep = sqSum.step1();
|
||||
|
||||
// compute first
|
||||
uint64 sum0;
|
||||
uint64 sqSum0;
|
||||
computeSum(src, hRegion, sum0, sqSum0);
|
||||
sumPtr[ 0 ] = static_cast<double>(sum0);
|
||||
sqSumPtr[ 0 ] = static_cast<double>(sqSum0);
|
||||
|
||||
for (int y = 0; y < size.height; y++) {
|
||||
shiftH(srcPtr, src.step, hRegion, y, sumPtr, sumStep, sum.cols, sqSumPtr, sqSumStep);
|
||||
if (y + 1 < size.height) {
|
||||
shiftV(srcPtr, src.step, vRegion, y + 1, sumPtr, sumStep, sqSumPtr, sqSumStep);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user