70 Commits

Author SHA1 Message Date
64298e45eb static check 2025-05-08 17:56:22 +08:00
93bef37422 fix: mismatch faction sign 2025-03-27 13:43:37 +08:00
58cdd2facf add cmd 2025-03-25 17:56:02 +08:00
a756fc1bf2 unroll 2025-03-10 09:10:32 +08:00
0f2391b255 omp optim 2025-03-02 12:47:11 +08:00
8448b07e17 update readme 2025-02-27 21:04:58 +08:00
b2652072ef misc 2025-02-26 17:35:52 +08:00
972dcf7cb3 small fix 2025-02-22 17:21:42 +08:00
d0b8a6f3a2 refactor intrinsics api 2025-02-10 21:33:38 +08:00
80e63e7637 set valid opencv version 2025-01-21 21:02:02 +08:00
8a975e19c9 add todo 2025-01-21 20:48:01 +08:00
45971ed070 update readme 2024-12-27 22:05:26 +08:00
094ada9337 misc 2024-12-23 09:05:27 +08:00
d07f070668 add pos subpixel 2024-12-22 21:01:38 +08:00
a3b1b64e9a fix: image size 2024-12-01 17:57:15 +08:00
d1e4abfc71 misc 2024-11-03 21:12:39 +08:00
cff37c2f08 fix: invalid size write 2024-11-03 20:51:33 +08:00
440c938b22 last miss fix 2024-10-08 21:08:15 +08:00
b6e2d9aba5 unified simd size 2024-10-08 20:39:30 +08:00
4a2f97e354 fix arm64 2024-09-27 21:11:19 +08:00
4b8ec4359c optimal cache miss 2024-09-22 17:20:24 +08:00
d451b46535 misc 2024-09-21 09:16:15 +08:00
09017f0bc0 let integral as standalone 2024-09-20 22:50:25 +08:00
eea1b6320b spilt vector and scalar code 2024-09-20 22:42:46 +08:00
b4cdaff8d1 misc 2024-09-20 17:55:29 +08:00
6109ff388a update README.md 2024-09-19 22:47:25 +08:00
98acd6efbd misc 2024-09-19 15:50:57 +08:00
92b09a51c7 misc 2024-09-19 11:28:20 +08:00
7f5f665af2 misc 2024-09-19 11:20:33 +08:00
44cf2a0526 fix integral 2024-09-19 10:32:26 +08:00
df8267b60d simd integral image 2024-09-18 22:31:51 +08:00
01a20a18f1 static check 2024-09-18 19:53:27 +08:00
087ab9bc17 misc 2024-09-18 18:10:35 +08:00
d7bf2ad384 no copy 2024-09-18 15:45:03 +08:00
2c20dc2006 add openmp support 2024-09-18 11:21:33 +08:00
57ae2c8704 update doc 2024-09-13 20:54:55 +08:00
8e7227f74b demo: match start top level 2024-09-13 20:39:50 +08:00
c72b709178 add LICENSE 2024-09-13 20:28:04 +08:00
b5523f8491 replace call cv::Mat::ptr 2024-09-13 17:55:58 +08:00
f43f78f45f add readme 2024-09-12 21:07:31 +08:00
15372e0b60 add sample image 2024-09-12 21:06:50 +08:00
3d315445ab remove unused 2024-09-12 19:58:50 +08:00
d03818a63c special optim for arm 2024-09-12 08:36:55 +08:00
ed14c0e6f4 product hand write 2024-09-11 22:15:47 +08:00
fa20b52b85 serialize to file 2024-09-11 17:53:31 +08:00
7a88dad291 misc 2024-09-11 17:52:38 +08:00
dd5de6f6f1 misc 2024-09-11 14:30:31 +08:00
8050b8612a 🎈 perf(match): less reduce sum 2024-09-11 10:21:56 +08:00
3a689e900f revert 2024-09-06 21:19:58 +08:00
b8918b123d static check 2024-09-06 14:35:08 +08:00
1c67371cc9 fix type 2024-09-05 13:57:41 +08:00
422c12d691 impl shift 2024-09-05 11:21:55 +08:00
bece4f138f misc 2024-09-04 18:14:09 +08:00
75098c811c remove unused 2024-09-04 14:57:09 +08:00
a52deadc30 misc 2024-09-04 09:15:30 +08:00
ef1556c538 build config 2024-09-04 09:12:58 +08:00
92160ee1d9 misc 2024-09-04 09:06:42 +08:00
29fb7b001c static check 2024-09-03 22:53:25 +08:00
d85d4f7443 misc 2024-09-03 18:05:13 +08:00
13cbf06f46 aligned model at training 2024-09-03 17:03:42 +08:00
a5c8049af1 aligned model at deserialization 2024-09-03 15:27:48 +08:00
371b926fb0 cleanr name 2024-09-02 17:57:10 +08:00
3409395778 misc 2024-09-01 17:54:02 +08:00
36392b3e34 format check 2024-08-31 18:21:00 +08:00
726a55725a fix doc 2024-08-30 22:00:56 +08:00
338c1dcf96 static check 2024-08-30 21:53:42 +08:00
6dd78fbe21 support rect roi 2024-08-30 20:57:12 +08:00
665559d8ca export api 2024-08-29 15:20:48 +08:00
8758d7801d misc 2024-08-27 10:36:22 +08:00
fc4f04b9be add loongarch simd(lsx) 2024-08-25 20:59:55 +08:00
22 changed files with 1286 additions and 767 deletions

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.12)
project(match)
find_package(OpenCV REQUIRED)
project(match)
find_package(OpenCV 4.8 REQUIRED)
option(ENABLE_OPENMP "enable openmp" OFF)
if(ENABLE_OPENMP)
@ -21,24 +21,28 @@ endif(ENABLE_OPENMP)
add_library(algo SHARED
grayMatch.h
grayMatch.cpp
sum.h
sum.cpp
serialize.cpp
privateType.h
apiExport.h
integral.h
integral.cpp
)
target_include_directories(algo PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(algo ${OpenCV_LIBRARIES} $<$<BOOL:${OPENMP_FOUND}>:OpenMP::OpenMP_CXX>)
target_compile_options(algo PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:-fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wl,--exclude-libs,ALL -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
#$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Darwin>>: -fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:-mlsx>
)
target_compile_definitions(algo PRIVATE API_EXPORTS
target_compile_definitions(algo PUBLIC API_EXPORTS
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:CV_LSX>
)
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")
#==============================================================
#exe
#==============================================================
@ -54,5 +58,3 @@ target_compile_options(${PROJECT_NAME} PRIVATE
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
)
target_compile_definitions(${PROJECT_NAME} PRIVATE IMG_DIR="${CMAKE_CURRENT_SOURCE_DIR}/img")
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")

24
LICENSE Normal file
View File

@ -0,0 +1,24 @@
BSD 2-Clause License
Copyright (c) 2024, SurfaceMan
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,13 +1,20 @@
# Template match with gray model(ncc)
## rotate-model vs main branch
## Note: branch [feature-rotate-model](https://github.com/SurfaceMan/gray_match/tree/feature-rotate-model) method 2x faster at matching!
## highlights:
1. original code based [Fastest_Image_Pattern_Matching](https://github.com/DennisLiu1993/Fastest_Image_Pattern_Matching), you can check out tag [v1.0](https://github.com/SurfaceMan/gray_match/releases/tag/v1.0) for more details.
2. refactor simd match process with opencv [Universal intrinsics](https://docs.opencv.org/4.x/df/d91/group__core__hal__intrin.html), have be tested on x86_64(sse),arm(neon),LoongArch(lsx).
3. support model save/load as binary file
4. provide pure c interface
5. support openmp
6. position with 3x3 subpixel interpolation
## usage:
all you need can be found in [main.cpp](main.cpp)
## gallery:
![sample](img/result.png)
| method | main(ms) | rotate-model(ms) | factor(main/rotate-model) |
|-----------|----------|------------------|---------------------------|
| train | 1 | 680 | 1/680 |
| match | 31 | 16 | 2 |
| train-omp | 1 | 160 | 1/160 |
| match-omp | 12 | 6 | 2 |
result: main brain method train model really fast(1ms) and parameter free, rotate-model method 2x faster in matching.

1
TODO.md Normal file
View File

@ -0,0 +1 @@
1. opencv intrinsics api [changed](https://github.com/opencv/opencv/pull/24371) since 4.9, need refactor

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,12 @@
#ifndef GRAY_MATCH_H
#define GRAY_MATCH_H
#include <opencv2/opencv.hpp>
#include "apiExport.h"
struct Model;
using Model_t = Model *;
struct Pose {
float x;
float y;
@ -14,11 +14,93 @@ struct Pose {
float score;
};
API_PUBLIC Model *trainModel(const cv::Mat &src, int level, double startAngle, double spanAngle,
double angleStep);
/**
* @brief train match model
* @param data image data
* @param width image width
* @param height image height
* @param channels image channels 1(gray)/3(rgb)/4(rgba)
* @param bytesPerLine bytes per line
* @param roiLeft rectangle roi left
* @param roiTop rectangle roi top
* @param roiWidth rectangle roi width
* @param roiHeight rectangle roi height
* @param levelNum pyramid levels (> 0:user setting,-1:auto)
* @return
*/
API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height, int channels,
int bytesPerLine, int roiLeft, int roiTop, int roiWidth,
int roiHeight, int levelNum);
/**
* @brief match model
* @param data image data
* @param width image width
* @param height image height
* @param channels image channels 1(gray)/3(rgb)/4(rgba)
* @param bytesPerLine bytes per line
* @param roiLeft rectangle roi left
* @param roiTop rectangle roi top
* @param roiWidth rectangle roi width
* @param roiHeight rectangle roi height
* @param model trained model
* @param count in(max detect count)/out(found count)
* @param poses pose array inited with size not less than count
* @param level match start at which level (level>=0 && level<modelLevel-1,-1:auto)
* @param startAngle rotation start angle
* @param spanAngle rotation angle range
* @param maxOverlap overlap threshold
* @param minScore minimum matched score
* @param subpixel compute subpixel result
* @return
*/
API_PUBLIC void matchModel(const unsigned char *data, int width, int height, int channels,
int bytesPerLine, int roiLeft, int roiTop, int roiWidth, int roiHeight,
Model_t model, int *count, Pose *poses, int level, double startAngle,
double spanAngle, double maxOverlap, double minScore, int subpixel);
API_PUBLIC void matchModel(const cv::Mat &dst, const Model *model, int *count, Pose *poses,
int level, double startAngle, double spanAngle, double maxOverlap,
double minScore, int subpixel);
/**
* @brief get trained model levels
* @param model
* @return pyramid level
*/
API_PUBLIC int modelLevel(Model_t model);
#endif // GRAY_MATCH_H
/**
* @brief get trained model image
* @param model
* @param level pyramid level index(level>=0 && level<modelLevel-1)
* @param data image data buffer(need allocated), can input nullptr to query width/height/channels
* @param length buffer length not less than width*height*channels
* @param width image width, can input nullptr
* @param height image height, can input nullptr
* @param channels image channels, can input nullptr
* @return
*/
API_PUBLIC void modelImage(Model_t model, int level, unsigned char *data, int length, int *width,
int *height, int *channels);
/**
* @brief free model
* @param model
* @return
*/
API_PUBLIC void freeModel(Model_t *model);
/**
* @brief serialize model to buffer
* @param model
* @param buffer need allocated, can input nullptr to query size
* @param size in(buffer size)/out(written size)
* @return true(success)false(failed)
*/
API_PUBLIC bool serialize(Model_t model, unsigned char *buffer, int *size);
/**
* @brief deserialize model
* @param buffer
* @param size buffer size
* @return model
*/
API_PUBLIC Model_t deserialize(unsigned char *buffer, int size);
#endif // GRAY_MATCH_H

BIN
img/3.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

BIN
img/h.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 932 KiB

BIN
img/i.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 932 KiB

BIN
img/j.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 932 KiB

BIN
img/k.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 932 KiB

BIN
img/l.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 932 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 336 KiB

BIN
img/result.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 374 KiB

180
integral.cpp Normal file
View File

@ -0,0 +1,180 @@
#include "integral.h"
#include "privateType.h"
#include <opencv2/core/hal/intrin.hpp>
inline void expand(const cv::v_int32 &src, cv::v_float64 &low, cv::v_float64 &high) {
low = cv::v_cvt_f64(src);
high = cv::v_cvt_f64_high(src);
}
inline void integralSum(const cv::v_uint16 &src, double *dst, const double *prevDst,
cv::v_uint32 &pre) {
auto sum = cv::v_add(src, cv::v_rotate_left<1>(src));
sum = cv::v_add(sum, cv::v_rotate_left<2>(sum));
sum = cv::v_add(sum, cv::v_rotate_left<4>(sum));
cv::v_uint32 v1;
cv::v_uint32 v2;
cv::v_expand(sum, v1, v2);
v1 = cv::v_add(v1, pre);
v2 = cv::v_add(v2, pre);
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
cv::v_float64 v3;
cv::v_float64 v4;
expand(cv::v_reinterpret_as_s32(v1), v3, v4);
cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
cv::v_store(dst + simdSize(cv::v_float64),
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
expand(cv::v_reinterpret_as_s32(v2), v3, v4);
cv::v_store(dst + simdSize(cv::v_float64) * 2,
cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
cv::v_store(dst + simdSize(cv::v_float64) * 3,
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
}
inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
cv::v_uint32 v1;
cv::v_uint32 v2;
cv::v_expand(src, v1, v2);
{
auto shift1 = cv::v_rotate_left<1>(src);
cv::v_uint32 v3;
cv::v_uint32 v4;
cv::v_expand(shift1, v3, v4);
v1 = cv::v_add(v1, v3);
v2 = cv::v_add(v2, v4);
v4 = cv::v_extract<2>(v1, v2);
v2 = cv::v_add(v2, v4);
v3 = cv::v_rotate_left<2>(v1);
v1 = cv::v_add(v1, v3);
v1 = cv::v_add(v1, pre);
v2 = cv::v_add(v2, v1);
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
}
cv::v_float64 v3;
cv::v_float64 v4;
expand(cv::v_reinterpret_as_s32(v1), v3, v4);
cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
cv::v_store(dst + simdSize(cv::v_float64),
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
expand(cv::v_reinterpret_as_s32(v2), v3, v4);
cv::v_store(dst + simdSize(cv::v_float64) * 2,
cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
cv::v_store(dst + simdSize(cv::v_float64) * 3,
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
}
/*
inline void integralSqSum(cv::v_uint32 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
src += cv::v_rotate_left<1>(src);
src += cv::v_rotate_left<2>(src);
src += pre;
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(src));
cv::v_float64 v1;
cv::v_float64 v2;
expand(cv::v_reinterpret_as_s32(src), v1, v2);
cv::v_store(dst, v1 + cv::v_load(prevDst));
cv::v_store(dst + simdSize(cv::v_float64), v2 + cv::v_load(prevDst +
simdSize(cv::v_float64)));
}
inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
cv::v_uint32 v1;
cv::v_uint32 v2;
cv::v_expand(src, v1, v2);
integralSqSum(v1, dst, prevDst, pre);
integralSqSum(v2, dst + simdSize(cv::v_uint32), prevDst + simdSize(cv::v_uint32),
pre);
}
*/
inline void integralSum(const cv::v_uint16 &v1, const cv::v_uint16 &v2, double *dst,
const double *prevDst, cv::v_uint32 &pre) {
integralSum(v1, dst, prevDst, pre);
integralSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
}
inline void integralSqSum(cv::v_uint16 &v1, cv::v_uint16 &v2, double *dst, double *prevDst,
cv::v_uint32 &pre) {
v1 = cv::v_mul_wrap(v1, v1);
v2 = cv::v_mul_wrap(v2, v2);
integralSqSum(v1, dst, prevDst, pre);
integralSqSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
}
void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum) {
const auto size = src.size() + cv::Size(1, 1);
sum.create(size, CV_64FC1);
sqSum.create(size, CV_64FC1);
memset(sum.data, 0, sum.step[ 0 ]);
memset(sqSum.data, 0, sqSum.step[ 0 ]);
const auto *srcStart = src.data;
const auto srcStep = src.step[ 0 ];
auto *sumStart = reinterpret_cast<double *>(sum.data) + sum.step1() + 1;
const auto sumStep = sum.step[ 0 ] / sum.step[ 1 ];
auto *sqSumStart = reinterpret_cast<double *>(sqSum.data) + sqSum.step1() + 1;
const auto sqSumStep = sqSum.step[ 0 ] / sqSum.step[ 1 ];
const auto end = size.width - simdSize(cv::v_uint8);
for (int y = 0; y < src.rows; y++) {
auto *srcPtr = srcStart + srcStep * y;
auto *sumPtr = sumStart + sumStep * y;
const auto *preSumPtr = sumStart + sumStep * (y - 1);
sumPtr[ -1 ] = 0;
cv::v_uint32 prevSum = cv::vx_setzero_u32();
for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
cv::v_uint16 v1;
cv::v_uint16 v2;
cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
integralSum(v1, v2, sumPtr + x, preSumPtr + x, prevSum);
}
}
for (int y = 0; y < src.rows; y++) {
auto *srcPtr = srcStart + srcStep * y;
auto *sqSumPtr = sqSumStart + sqSumStep * y;
auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
sqSumPtr[ -1 ] = 0;
cv::v_uint32 prevSqSum = cv::vx_setzero_u32();
for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
cv::v_uint16 v1;
cv::v_uint16 v2;
cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
integralSqSum(v1, v2, sqSumPtr + x, preSqSumPtr + x, prevSqSum);
}
}
const auto start = src.cols - src.cols % simdSize(cv::v_uint8);
for (int y = 0; y < src.rows; y++) {
auto *srcPtr = srcStart + srcStep * y;
auto *sumPtr = sumStart + sumStep * y;
auto *sqSumPtr = sqSumStart + sqSumStep * y;
const auto *preSumPtr = sumStart + sumStep * (y - 1);
const auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
for (int x = start; x < src.cols; x++) {
const auto val = srcPtr[ x ];
const auto sqVal = val * val;
sumPtr[ x ] = sumPtr[ x - 1 ] + val + preSumPtr[ x ] - preSumPtr[ x - 1 ];
sqSumPtr[ x ] = sqSumPtr[ x - 1 ] + sqVal + preSqSumPtr[ x ] - preSqSumPtr[ x - 1 ];
}
}
}

8
integral.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef INTEGRAL_H
#define INTEGRAL_H
#include <opencv2/opencv.hpp>
void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum);
#endif // INTEGRAL_H

View File

@ -1,9 +1,9 @@
#include "grayMatch.h"
#include <fstream>
#include <iostream>
#include <opencv2/core/utility.hpp>
#include <opencv2/opencv.hpp>
#include <string>
int main(int argc, const char *argv[]) {
const std::string keys = "{model m || model image}"
@ -24,8 +24,8 @@ int main(int argc, const char *argv[]) {
return 0;
}
auto srcFile = std::string(IMG_DIR) + "/model3.png";
auto dstFile = std::string(IMG_DIR) + "/model3_src2.png";
auto srcFile = std::string(IMG_DIR) + "/3.bmp";
auto dstFile = std::string(IMG_DIR) + "/h.bmp";
if (cmd.has("model"))
srcFile = cmd.get<std::string>("model");
if (cmd.has("scene"))
@ -37,19 +37,66 @@ int main(int argc, const char *argv[]) {
return -1;
}
const std::string modelName("model.bin");
{
auto t0 = cv::getTickCount();
auto model = trainModel(src.data, src.cols, src.rows, src.channels(),
static_cast<int>(src.step), 0, 0, src.cols, src.rows, -1);
auto t1 = cv::getTickCount();
// get size
int size;
serialize(model, nullptr, &size);
// serialize to buffer
std::vector<uchar> buffer(size);
serialize(model, buffer.data(), &size);
// write to file
std::ofstream ofs(modelName, std::ios::binary | std::ios::out);
if (!ofs.is_open()) {
return -1;
}
ofs.write(reinterpret_cast<const char *>(buffer.data()), size);
freeModel(&model);
auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
std::cout << "train(s):" << trainCost << std::endl;
}
int count = 70;
std::vector<Pose> poses(count);
Model_t model;
auto score = cmd.get<float>("threshold");
{
// open file
std::ifstream ifs(modelName, std::ios::binary | std::ios::in);
if (!ifs.is_open()) {
return -2;
}
auto t0 = cv::getTickCount();
auto model = trainModel(src, -1, 0, 360, -1);
auto t1 = cv::getTickCount();
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
auto t2 = cv::getTickCount();
// get size
ifs.seekg(0, std::ios::end);
auto size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
// read to buffer
std::vector<uchar> buffer(size);
ifs.read(reinterpret_cast<char *>(buffer.data()), size);
// deserialize from buffer
model = deserialize(buffer.data(), static_cast<int>(buffer.size()));
auto t2 = cv::getTickCount();
matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0, 0,
dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
auto t3 = cv::getTickCount();
auto matchCost = static_cast<double>(t3 - t2) / cv::getTickFrequency();
std::cout << "match(s):" << matchCost << std::endl;
}
const auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
const auto matchCost = static_cast<double>(t2 - t1) / cv::getTickFrequency();
std::cout << "train(s):" << trainCost << " match(s):" << matchCost << std::endl;
for (int i = 0; i < count; i++) {
const auto &pose = poses[ i ];
std::cout << pose.x << "," << pose.y << "," << pose.angle << "," << pose.score << std::endl;
@ -60,7 +107,8 @@ int main(int argc, const char *argv[]) {
auto start = cv::getTickCount();
for (int i = 0; i < times; i++) {
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0,
0, dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
count = 70;
}
auto end = cv::getTickCount();

View File

@ -2,49 +2,40 @@
#include <opencv2/core.hpp>
struct HRLE {
int row = -1;
int startColumn = -1;
int length = 0;
};
struct VRLE {
int col = -1;
int startRow = -1;
int length = 0;
};
using HRegion = std::vector<HRLE>;
using VRegion = std::vector<VRLE>;
struct Template {
cv::Mat img;
HRegion hRegion;
VRegion vRegion;
cv::RotatedRect rect;
double mean = 0;
double normal = 0;
double invArea = 0;
};
struct Layer {
double angleStep = 0;
std::vector<Template> templates;
};
struct Model {
double startAngle = 0;
double stopAngle = 0;
double angleStep = 0;
std::vector<cv::Mat> pyramids;
std::vector<cv::Scalar> mean;
std::vector<double> normal;
std::vector<double> invArea;
std::vector<uchar> equal1;
uchar borderColor = 0;
cv::Size srcSize;
std::vector<Layer> layers;
void clear() {
pyramids.clear();
normal.clear();
invArea.clear();
mean.clear();
equal1.clear();
}
void resize(const std::size_t size) {
normal.resize(size);
invArea.resize(size);
mean.resize(size);
equal1.resize(size);
}
void reserve(const std::size_t size) {
pyramids.reserve(size);
normal.reserve(size);
invArea.reserve(size);
mean.reserve(size);
equal1.reserve(size);
}
};
#if CV_VERSION_MAJOR >= 4 && CV_VERSION_MINOR >= 8
#define simdSize(type) cv::VTraits<type>::nlanes
#else
#define simdSize(type) type::nlanes
#endif
#endif

228
serialize.cpp Normal file
View File

@ -0,0 +1,228 @@
#include "grayMatch.h"
#include "privateType.h"
#include <opencv2/core/hal/intrin.hpp>
class Buffer {
public:
Buffer(const int size_, unsigned char *data_)
: m_size(size_)
, m_data(data_) {}
virtual ~Buffer() = default;
virtual void operator&(uchar &val) = 0;
virtual void operator&(std::vector<cv::Mat> &val) = 0;
virtual void operator&(std::vector<cv::Scalar> &val) = 0;
virtual void operator&(std::vector<double> &val) = 0;
virtual void operator&(std::vector<uchar> &val) = 0;
void operator&(Model &val) {
this->operator&(val.pyramids);
this->operator&(val.mean);
this->operator&(val.normal);
this->operator&(val.invArea);
this->operator&(val.equal1);
this->operator&(val.borderColor);
}
[[nodiscard]] int count() const {
return m_size;
}
protected:
int m_size = 0;
unsigned char *m_data = nullptr;
};
void binWrite(void *const dst, const void *src, const int size) {
memcpy(dst, src, size);
}
void fakeWrite(void *const dst, const void *src, const int size) {
(void)dst;
(void)src;
(void)size;
}
using Write = void (*)(void *, const void *, int);
template <Write write> class OutBuffer final : public Buffer {
public:
explicit OutBuffer(unsigned char *const data_)
: Buffer(0, data_) {}
void operator&(uchar &val) override {
write(m_data + m_size, &val, sizeof(val));
m_size += static_cast<int>(sizeof(val));
}
void operator&(std::vector<cv::Mat> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
for (auto &element : val) {
writeElement(element);
}
}
void writeElement(cv::Mat &val) {
write(m_data + m_size, &val.cols, sizeof(int));
m_size += static_cast<int>(sizeof(int));
write(m_data + m_size, &val.rows, sizeof(int));
m_size += static_cast<int>(sizeof(int));
for (int i = 0; i < val.rows; i++) {
write(m_data + m_size, val.ptr<unsigned char>(i), val.cols);
m_size += val.cols;
}
}
void operator&(std::vector<cv::Scalar> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
for (auto &element : val) {
writeElement(element);
}
}
void writeElement(const cv::Scalar &val) {
write(m_data + m_size, val.val, sizeof(double) * 4);
m_size += static_cast<int>(sizeof(double)) * 4;
}
void operator&(std::vector<double> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
write(m_data + m_size, val.data(), static_cast<int>(sizeof(double)) * size);
m_size += static_cast<int>(sizeof(double)) * size;
}
void operator&(std::vector<uchar> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
write(m_data + m_size, val.data(), sizeof(uchar) * size);
m_size += static_cast<int>(sizeof(uchar)) * size;
}
};
using SizeCountBuffer = OutBuffer<fakeWrite>;
using WriteBuffer = OutBuffer<binWrite>;
class ReadBuffer final : public Buffer {
public:
explicit ReadBuffer(unsigned char *data_)
: Buffer(0, data_) {}
void operator&(uchar &val) override {
memcpy(&val, m_data + m_size, sizeof(uchar));
m_size += static_cast<int>(sizeof(uchar));
}
void operator&(std::vector<cv::Mat> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
for (auto &element : val) {
read(element);
}
}
void read(cv::Mat &val) {
int width = 0;
memcpy(&width, m_data + m_size, sizeof(int));
m_size += static_cast<int>(sizeof(int));
int height = 0;
memcpy(&height, m_data + m_size, sizeof(int));
m_size += static_cast<int>(sizeof(int));
const int alignedWidth = static_cast<int>(cv::alignSize(width, simdSize(cv::v_uint8)));
const auto img = cv::Mat::zeros(height, alignedWidth, CV_8UC1);
val = img(cv::Rect(0, 0, width, height));
for (int y = 0; y < height; y++) {
auto *ptr = val.ptr<uchar>(y);
memcpy(ptr, m_data + m_size, width);
m_size += width;
}
}
void operator&(std::vector<cv::Scalar> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
for (auto &element : val) {
read(element);
}
}
void read(cv::Scalar &val) {
memcpy(val.val, m_data + m_size, sizeof(double) * 4);
m_size += static_cast<int>(sizeof(double)) * 4;
}
void operator&(std::vector<double> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
memcpy(val.data(), m_data + m_size, sizeof(double) * count);
m_size += static_cast<int>(sizeof(double)) * count;
}
void operator&(std::vector<uchar> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
memcpy(val.data(), m_data + m_size, sizeof(bool) * count);
m_size += static_cast<int>(sizeof(uchar)) * count;
}
};
void operation(Buffer *buf, Model &model) {
*buf &model;
}
bool serialize(Model *const model, unsigned char *buffer, int *size) {
if (nullptr == size) {
return false;
}
if (nullptr == model) {
*size = 0;
return false;
}
SizeCountBuffer counter(buffer);
operation(&counter, *model);
if (nullptr == buffer) {
*size = counter.count();
return true;
}
if (counter.count() > *size) {
*size = 0;
return false;
}
WriteBuffer writer(buffer);
operation(&writer, *model);
return true;
}
Model_t deserialize(unsigned char *buffer, const int size) {
if (size < 1 || nullptr == buffer) {
return nullptr;
}
ReadBuffer reader(buffer);
auto *model = new Model;
operation(&reader, *model);
return model;
}

240
sum.cpp
View File

@ -1,240 +0,0 @@
#include "sum.h"
#include <opencv2/core/hal/intrin.hpp>
inline cv::v_uint32x4 v_add_expand(const cv::v_uint16x8 &src) {
cv::v_uint32x4 low;
cv::v_uint32x4 high;
cv::v_expand(src, low, high);
return cv::v_add(low, high);
}
inline cv::v_uint64x2 v_add_expand(const cv::v_uint32x4 &src) {
cv::v_uint64x2 low;
cv::v_uint64x2 high;
cv::v_expand(src, low, high);
return cv::v_add(low, high);
}
inline void computeSum(const cv::v_uint8x16 &src, cv::v_uint32x4 &sum, cv::v_uint64x2 &sqSum) {
cv::v_uint16x8 low;
cv::v_uint16x8 high;
cv::v_expand(src, low, high);
sum = cv::v_add(sum, v_add_expand(cv::v_add(low, high)));
const auto dot = cv::v_dotprod_expand_fast(src, src);
sqSum = cv::v_add(sqSum, v_add_expand(dot));
}
void computeSum(const cv::Mat &src, const HRegion &hRegion, uint64 &sum, uint64 &sqSum) {
constexpr auto blockSize = simdSize(cv::v_uint8);
const auto *srcPtr = src.data;
cv::v_uint32x4 vSum = cv::v_setzero_u32();
cv::v_uint64x2 vSqSum = cv::v_setzero_u64();
uint32_t partSum = 0;
uint64 partSqSum = 0;
for (const auto &rle : hRegion) {
const auto *ptr = srcPtr + src.step * rle.row + rle.startColumn;
int i = 0;
for (; i < rle.length - blockSize; i += blockSize) {
computeSum(cv::v_load(ptr + i), vSum, vSqSum);
}
// TODO aligned fill 0
for (; i < rle.length; i++) {
const auto val = ptr[ i ];
partSum += val;
partSqSum += static_cast<ushort>(val) * static_cast<ushort>(val);
}
}
sum = cv::v_reduce_sum(vSum) + partSum;
sqSum = cv::v_reduce_sum(vSqSum) + partSqSum;
}
inline void computeSumDiff(const cv::v_uint16x8 &start, const cv::v_uint16x8 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1) {
cv::v_int16x8 sub;
{
const auto vStart = cv::v_reinterpret_as_s16(start);
const auto vEnd = cv::v_reinterpret_as_s16(end);
sub = cv::v_sub(vEnd, vStart);
}
cv::v_int32x4 val = cv::v_expand_low(sub);
diff0 = cv::v_add(diff0, val);
val = cv::v_expand_high(sub);
diff1 = cv::v_add(diff1, val);
}
inline void computeSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
cv::v_int32x4 &diff3) {
computeSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0, diff1);
computeSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff2, diff3);
}
inline void computeSqSumDiff(const cv::v_uint32x4 &start, const cv::v_uint32x4 &end,
cv::v_int32x4 &diff0) {
const cv::v_int32x4 vStart = cv::v_reinterpret_as_s32(start);
const cv::v_int32x4 vEnd = cv::v_reinterpret_as_s32(end);
const cv::v_int32x4 sub = cv::v_sub(vEnd, vStart);
diff0 = cv::v_add(diff0, sub);
}
inline void computeSqSumDiff(cv::v_uint16x8 &start, cv::v_uint16x8 &end, cv::v_int32x4 &diff0,
cv::v_int32x4 &diff1) {
start = cv::v_mul(start, start);
end = cv::v_mul(end, end);
computeSqSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0);
computeSqSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff1);
}
inline void computeSqSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
cv::v_int32x4 &diff3) {
auto vStart = cv::v_expand_low(start);
auto vEnd = cv::v_expand_low(end);
computeSqSumDiff(vStart, vEnd, diff0, diff1);
vStart = cv::v_expand_high(start);
vEnd = cv::v_expand_high(end);
computeSqSumDiff(vStart, vEnd, diff2, diff3);
}
inline void v_expand_store(double *ptr, const std::array<int, 4> &val) {
ptr[ 0 ] = ptr[ -1 ] + val[ 0 ];
ptr[ 1 ] = ptr[ 0 ] + val[ 1 ];
ptr[ 2 ] = ptr[ 1 ] + val[ 2 ];
ptr[ 3 ] = ptr[ 2 ] + val[ 3 ];
}
void shiftH(const uchar *src, std::size_t srcStep, const HRegion &hRegion, int row, double *sum,
std::size_t sumStep, int sumWidth, double *sqSum, std::size_t sqSumStep) {
constexpr auto blockSize = simdSize(cv::v_uint8);
auto *srcPtr = src;
auto *sumPtr = sum + row * sumStep;
auto *sqSumPtr = sqSum + row * sqSumStep;
std::array<int, 4> buf{};
int i = 1;
for (; i < sumWidth - blockSize; i += blockSize) {
cv::v_int32x4 diff0 = cv::v_setzero_s32();
cv::v_int32x4 diff1 = cv::v_setzero_s32();
cv::v_int32x4 diff2 = cv::v_setzero_s32();
cv::v_int32x4 diff3 = cv::v_setzero_s32();
cv::v_int32x4 diff10 = cv::v_setzero_s32();
cv::v_int32x4 diff11 = cv::v_setzero_s32();
cv::v_int32x4 diff12 = cv::v_setzero_s32();
cv::v_int32x4 diff13 = cv::v_setzero_s32();
for (const auto &rle : hRegion) {
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
auto *endPtr = startPtr + rle.length;
auto vStart = cv::v_load(startPtr);
auto vEnd = cv::v_load(endPtr);
computeSumDiff(vStart, vEnd, diff0, diff1, diff2, diff3);
computeSqSumDiff(vStart, vEnd, diff10, diff11, diff12, diff13);
}
auto *sumPtrStart = sumPtr + i;
cv::v_store(buf.data(), diff0);
v_expand_store(sumPtrStart, buf);
cv::v_store(buf.data(), diff1);
v_expand_store(sumPtrStart + 4, buf);
cv::v_store(buf.data(), diff2);
v_expand_store(sumPtrStart + 8, buf);
cv::v_store(buf.data(), diff3);
v_expand_store(sumPtrStart + 12, buf);
auto *sqSumPtrStart = sqSumPtr + i;
cv::v_store(buf.data(), diff10);
v_expand_store(sqSumPtrStart, buf);
cv::v_store(buf.data(), diff11);
v_expand_store(sqSumPtrStart + 4, buf);
cv::v_store(buf.data(), diff12);
v_expand_store(sqSumPtrStart + 8, buf);
cv::v_store(buf.data(), diff13);
v_expand_store(sqSumPtrStart + 12, buf);
}
for (; i < sumWidth; i++) {
int32_t partSum = 0;
int32_t partSqSum = 0;
for (const auto &rle : hRegion) {
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
auto *endPtr = startPtr + rle.length;
const int32_t start = *startPtr;
const int32_t end = *endPtr;
partSum += end - start;
partSqSum += end * end - start * start;
}
auto *sumPtrStart = sumPtr + i;
sumPtrStart[ 0 ] = sumPtrStart[ -1 ] + partSum;
auto *sqSumPtrStart = sqSumPtr + i;
sqSumPtrStart[ 0 ] = sqSumPtrStart[ -1 ] + partSqSum;
}
}
void shiftV(const uchar *src, std::size_t srcStep, const VRegion &vRegion, int row, double *sum,
std::size_t sumStep, double *sqSum, std::size_t sqSumStep) {
auto *srcPtr = src;
auto *sumPtr = sum + row * sumStep;
auto *sqSumPtr = sqSum + row * sqSumStep;
int32_t partSum = 0;
int32_t partSqSum = 0;
for (const auto &rle : vRegion) {
auto *startPtr = srcPtr + (row + rle.startRow - 1) * srcStep + rle.col;
auto *endPtr = startPtr + rle.length * srcStep;
const int32_t start = *startPtr;
const int32_t end = *endPtr;
partSum += end - start;
partSqSum += end * end - start * start;
}
sumPtr[ 0 ] = *(sumPtr - sumStep) + partSum;
sqSumPtr[ 0 ] = *(sqSumPtr - sqSumStep) + partSqSum;
}
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
const HRegion &hRegion, const VRegion &vRegion) {
const auto size = src.size() - templateSize + cv::Size(1, 1);
sum.create(size, CV_64FC1);
sqSum.create(size, CV_64FC1);
const auto *srcPtr = src.data;
auto *sumPtr = reinterpret_cast<double *>(sum.data);
auto *sqSumPtr = reinterpret_cast<double *>(sqSum.data);
const auto sumStep = sum.step1();
const auto sqSumStep = sqSum.step1();
// compute first
uint64 sum0;
uint64 sqSum0;
computeSum(src, hRegion, sum0, sqSum0);
sumPtr[ 0 ] = static_cast<double>(sum0);
sqSumPtr[ 0 ] = static_cast<double>(sqSum0);
for (int y = 0; y < size.height; y++) {
shiftH(srcPtr, src.step, hRegion, y, sumPtr, sumStep, sum.cols, sqSumPtr, sqSumStep);
if (y + 1 < size.height) {
shiftV(srcPtr, src.step, vRegion, y + 1, sumPtr, sumStep, sqSumPtr, sqSumStep);
}
}
}

8
sum.h
View File

@ -1,8 +0,0 @@
#pragma once
#include "privateType.h"
#include <opencv2/opencv.hpp>
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
const HRegion &hRegion, const VRegion &vRegion);