52 Commits

Author SHA1 Message Date
ad95fac40c misc 2025-05-08 21:26:11 +08:00
699169140a static check 2025-05-08 17:20:04 +08:00
5e02244e93 fix:table format 2025-05-08 17:06:00 +08:00
1a49d466fe fix:type 2025-05-08 16:44:08 +08:00
a9d69311a8 update readme 2025-03-27 13:45:00 +08:00
11517dad5c add match bench 2025-03-18 20:59:47 +08:00
5035233913 misc 2025-03-13 20:46:05 +08:00
2d467ebbef misc 2025-03-13 15:41:28 +08:00
ca1e8149a1 cmd config 2025-03-13 14:58:42 +08:00
1749a2c720 export library 2025-03-13 11:42:10 +08:00
bcdade9daa Revert "aligned scene"
This reverts commit e5cf60e2547db7e7b27d03080b16bf1db79807bf.
2025-03-13 09:21:37 +08:00
cfdb84093d Revert "aligned scene"
This reverts commit 31c51d77ba8c98ef501d204e2ceb1638cc0e6728.
2025-03-13 09:21:26 +08:00
31c51d77ba aligned scene 2025-03-13 09:10:12 +08:00
e5cf60e254 aligned scene 2025-03-12 23:12:02 +08:00
69226dd197 omp optim 2025-03-02 12:07:13 +08:00
d9a50a40d1 code clean 2025-03-01 18:12:02 +08:00
e6f440006b add readme 2025-02-27 20:58:08 +08:00
fa61990d3c add default image 2025-02-27 20:08:44 +08:00
b891c34176 add image 2025-02-27 20:05:42 +08:00
d119d45722 less step call 2025-02-27 09:39:04 +08:00
25f0b54a5a fix: crash unaligned load 2025-02-27 08:59:29 +08:00
4b26673655 opti dot product 2025-02-26 23:12:21 +08:00
f6ee887e20 misc 2025-02-26 17:56:50 +08:00
9bf943b8d1 uniform dot_product & unroll 2025-02-26 11:36:01 +08:00
3c3b0d4632 aligned model & less ptr call 2025-02-26 11:08:25 +08:00
d7afc907c2 misc 2025-02-26 10:10:08 +08:00
1b2286100e less ptr call 2025-02-26 10:05:18 +08:00
82320082fa static check: small fix 2025-02-26 08:44:11 +08:00
59bbc596bb clear 2025-02-25 22:20:56 +08:00
b041883bca enable openmp support 2025-02-25 17:54:27 +08:00
b4042ca7cf misc 2025-02-25 16:29:54 +08:00
c879408141 misc 2025-02-25 15:18:25 +08:00
d8d888c045 misc 2025-02-25 13:57:14 +08:00
a0f53bc38e misc 2025-02-25 11:08:29 +08:00
7346fc74b9 misc 2025-02-24 22:31:42 +08:00
93220b592e misc 2025-02-24 18:13:27 +08:00
4708a02218 init mask sum 2025-02-24 15:46:05 +08:00
88fdb73ae2 build config 2024-09-10 22:23:17 +08:00
a19993a747 misc 2024-09-03 11:23:37 +08:00
7d7cba4a4c rotation model finish 2024-09-03 09:34:41 +08:00
df05d073c7 misc 2024-09-02 18:12:59 +08:00
bf6fb79131 misc 2024-09-02 17:33:57 +08:00
18a73abd03 default 2024-09-02 14:21:56 +08:00
e54921809c misc 2024-09-02 11:53:42 +08:00
34e8f8bd67 misc 2024-09-02 10:33:14 +08:00
e0202a0fd9 misc 2024-08-28 18:14:29 +08:00
199ea46bb1 misc 2024-08-28 17:19:59 +08:00
f794aeb840 misc 2024-08-27 22:17:29 +08:00
1efe5f11ac misc 2024-08-27 17:56:17 +08:00
1081c707af misc 2024-08-26 17:28:35 +08:00
489d5428e3 add template rotation 2024-08-26 15:19:23 +08:00
dea266853c misc 2024-08-25 18:03:33 +08:00
22 changed files with 768 additions and 1287 deletions

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.12)
project(match)
find_package(OpenCV 4.8 REQUIRED)
find_package(OpenCV REQUIRED)
option(ENABLE_OPENMP "enable openmp" OFF)
if(ENABLE_OPENMP)
@ -21,28 +21,24 @@ endif(ENABLE_OPENMP)
add_library(algo SHARED
grayMatch.h
grayMatch.cpp
serialize.cpp
sum.h
sum.cpp
privateType.h
apiExport.h
integral.h
integral.cpp
)
target_include_directories(algo PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(algo ${OpenCV_LIBRARIES} $<$<BOOL:${OPENMP_FOUND}>:OpenMP::OpenMP_CXX>)
target_compile_options(algo PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wl,--exclude-libs,ALL -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:-fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
#$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Darwin>>: -fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:-mlsx>
)
target_compile_definitions(algo PUBLIC API_EXPORTS
target_compile_definitions(algo PRIVATE API_EXPORTS
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:CV_LSX>
)
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")
#==============================================================
#exe
#==============================================================
@ -58,3 +54,5 @@ target_compile_options(${PROJECT_NAME} PRIVATE
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
)
target_compile_definitions(${PROJECT_NAME} PRIVATE IMG_DIR="${CMAKE_CURRENT_SOURCE_DIR}/img")
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")

24
LICENSE
View File

@ -1,24 +0,0 @@
BSD 2-Clause License
Copyright (c) 2024, SurfaceMan
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,20 +1,13 @@
# Template match with gray model(ncc)
## Note: branch [feature-rotate-model](https://github.com/SurfaceMan/gray_match/tree/feature-rotate-model) method 2x faster at matching!
## highlights:
1. original code based [Fastest_Image_Pattern_Matching](https://github.com/DennisLiu1993/Fastest_Image_Pattern_Matching), you can check out tag [v1.0](https://github.com/SurfaceMan/gray_match/releases/tag/v1.0) for more details.
2. refactor simd match process with opencv [Universal intrinsics](https://docs.opencv.org/4.x/df/d91/group__core__hal__intrin.html), have be tested on x86_64(sse),arm(neon),LoongArch(lsx).
3. support model save/load as binary file
4. provide pure c interface
5. support openmp
6. position with 3x3 subpixel interpolation
## usage:
all you need can be found in [main.cpp](main.cpp)
## gallery:
![sample](img/result.png)
## rotate-model vs main branch
| method | main(ms) | rotate-model(ms) | factor(main/rotate-model) |
|-----------|----------|------------------|---------------------------|
| train | 1 | 680 | 1/680 |
| match | 31 | 16 | 2 |
| train-omp | 1 | 160 | 1/160 |
| match-omp | 12 | 6 | 2 |
result: main brain method train model really fast(1ms) and parameter free, rotate-model method 2x faster in matching.

View File

@ -1 +0,0 @@
1. opencv intrinsics api [changed](https://github.com/opencv/opencv/pull/24371) since 4.9, need refactor

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,12 @@
#ifndef GRAY_MATCH_H
#define GRAY_MATCH_H
#include <opencv2/opencv.hpp>
#include "apiExport.h"
struct Model;
using Model_t = Model *;
struct Pose {
float x;
float y;
@ -14,93 +14,11 @@ struct Pose {
float score;
};
/**
* @brief train match model
* @param data image data
* @param width image width
* @param height image height
* @param channels image channels 1(gray)/3(rgb)/4(rgba)
* @param bytesPerLine bytes per line
* @param roiLeft rectangle roi left
* @param roiTop rectangle roi top
* @param roiWidth rectangle roi width
* @param roiHeight rectangle roi height
* @param levelNum pyramid levels (> 0:user setting,-1:auto)
* @return
*/
API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height, int channels,
int bytesPerLine, int roiLeft, int roiTop, int roiWidth,
int roiHeight, int levelNum);
/**
* @brief match model
* @param data image data
* @param width image width
* @param height image height
* @param channels image channels 1(gray)/3(rgb)/4(rgba)
* @param bytesPerLine bytes per line
* @param roiLeft rectangle roi left
* @param roiTop rectangle roi top
* @param roiWidth rectangle roi width
* @param roiHeight rectangle roi height
* @param model trained model
* @param count in(max detect count)/out(found count)
* @param poses pose array inited with size not less than count
* @param level match start at which level (level>=0 && level<modelLevel-1,-1:auto)
* @param startAngle rotation start angle
* @param spanAngle rotation angle range
* @param maxOverlap overlap threshold
* @param minScore minimum matched score
* @param subpixel compute subpixel result
* @return
*/
API_PUBLIC void matchModel(const unsigned char *data, int width, int height, int channels,
int bytesPerLine, int roiLeft, int roiTop, int roiWidth, int roiHeight,
Model_t model, int *count, Pose *poses, int level, double startAngle,
double spanAngle, double maxOverlap, double minScore, int subpixel);
API_PUBLIC Model *trainModel(const cv::Mat &src, int level, double startAngle, double spanAngle,
double angleStep);
/**
* @brief get trained model levels
* @param model
* @return pyramid level
*/
API_PUBLIC int modelLevel(Model_t model);
API_PUBLIC void matchModel(const cv::Mat &dst, const Model *model, int *count, Pose *poses,
int level, double startAngle, double spanAngle, double maxOverlap,
double minScore, int subpixel);
/**
* @brief get trained model image
* @param model
* @param level pyramid level index(level>=0 && level<modelLevel-1)
* @param data image data buffer(need allocated), can input nullptr to query width/height/channels
* @param length buffer length not less than width*height*channels
* @param width image width, can input nullptr
* @param height image height, can input nullptr
* @param channels image channels, can input nullptr
* @return
*/
API_PUBLIC void modelImage(Model_t model, int level, unsigned char *data, int length, int *width,
int *height, int *channels);
/**
* @brief free model
* @param model
* @return
*/
API_PUBLIC void freeModel(Model_t *model);
/**
* @brief serialize model to buffer
* @param model
* @param buffer need allocated, can input nullptr to query size
* @param size in(buffer size)/out(written size)
* @return true(success)false(failed)
*/
API_PUBLIC bool serialize(Model_t model, unsigned char *buffer, int *size);
/**
* @brief deserialize model
* @param buffer
* @param size buffer size
* @return model
*/
API_PUBLIC Model_t deserialize(unsigned char *buffer, int size);
#endif // GRAY_MATCH_H
#endif // GRAY_MATCH_H

BIN
img/3.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 75 KiB

BIN
img/h.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 932 KiB

BIN
img/i.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 932 KiB

BIN
img/j.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 932 KiB

BIN
img/k.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 932 KiB

BIN
img/l.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 932 KiB

BIN
img/model3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

BIN
img/model3_src2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 336 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 374 KiB

View File

@ -1,180 +0,0 @@
#include "integral.h"
#include "privateType.h"
#include <opencv2/core/hal/intrin.hpp>
inline void expand(const cv::v_int32 &src, cv::v_float64 &low, cv::v_float64 &high) {
low = cv::v_cvt_f64(src);
high = cv::v_cvt_f64_high(src);
}
inline void integralSum(const cv::v_uint16 &src, double *dst, const double *prevDst,
cv::v_uint32 &pre) {
auto sum = cv::v_add(src, cv::v_rotate_left<1>(src));
sum = cv::v_add(sum, cv::v_rotate_left<2>(sum));
sum = cv::v_add(sum, cv::v_rotate_left<4>(sum));
cv::v_uint32 v1;
cv::v_uint32 v2;
cv::v_expand(sum, v1, v2);
v1 = cv::v_add(v1, pre);
v2 = cv::v_add(v2, pre);
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
cv::v_float64 v3;
cv::v_float64 v4;
expand(cv::v_reinterpret_as_s32(v1), v3, v4);
cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
cv::v_store(dst + simdSize(cv::v_float64),
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
expand(cv::v_reinterpret_as_s32(v2), v3, v4);
cv::v_store(dst + simdSize(cv::v_float64) * 2,
cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
cv::v_store(dst + simdSize(cv::v_float64) * 3,
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
}
inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
cv::v_uint32 v1;
cv::v_uint32 v2;
cv::v_expand(src, v1, v2);
{
auto shift1 = cv::v_rotate_left<1>(src);
cv::v_uint32 v3;
cv::v_uint32 v4;
cv::v_expand(shift1, v3, v4);
v1 = cv::v_add(v1, v3);
v2 = cv::v_add(v2, v4);
v4 = cv::v_extract<2>(v1, v2);
v2 = cv::v_add(v2, v4);
v3 = cv::v_rotate_left<2>(v1);
v1 = cv::v_add(v1, v3);
v1 = cv::v_add(v1, pre);
v2 = cv::v_add(v2, v1);
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
}
cv::v_float64 v3;
cv::v_float64 v4;
expand(cv::v_reinterpret_as_s32(v1), v3, v4);
cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
cv::v_store(dst + simdSize(cv::v_float64),
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
expand(cv::v_reinterpret_as_s32(v2), v3, v4);
cv::v_store(dst + simdSize(cv::v_float64) * 2,
cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
cv::v_store(dst + simdSize(cv::v_float64) * 3,
cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
}
/*
inline void integralSqSum(cv::v_uint32 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
src += cv::v_rotate_left<1>(src);
src += cv::v_rotate_left<2>(src);
src += pre;
pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(src));
cv::v_float64 v1;
cv::v_float64 v2;
expand(cv::v_reinterpret_as_s32(src), v1, v2);
cv::v_store(dst, v1 + cv::v_load(prevDst));
cv::v_store(dst + simdSize(cv::v_float64), v2 + cv::v_load(prevDst +
simdSize(cv::v_float64)));
}
inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
cv::v_uint32 v1;
cv::v_uint32 v2;
cv::v_expand(src, v1, v2);
integralSqSum(v1, dst, prevDst, pre);
integralSqSum(v2, dst + simdSize(cv::v_uint32), prevDst + simdSize(cv::v_uint32),
pre);
}
*/
inline void integralSum(const cv::v_uint16 &v1, const cv::v_uint16 &v2, double *dst,
const double *prevDst, cv::v_uint32 &pre) {
integralSum(v1, dst, prevDst, pre);
integralSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
}
inline void integralSqSum(cv::v_uint16 &v1, cv::v_uint16 &v2, double *dst, double *prevDst,
cv::v_uint32 &pre) {
v1 = cv::v_mul_wrap(v1, v1);
v2 = cv::v_mul_wrap(v2, v2);
integralSqSum(v1, dst, prevDst, pre);
integralSqSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
}
void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum) {
const auto size = src.size() + cv::Size(1, 1);
sum.create(size, CV_64FC1);
sqSum.create(size, CV_64FC1);
memset(sum.data, 0, sum.step[ 0 ]);
memset(sqSum.data, 0, sqSum.step[ 0 ]);
const auto *srcStart = src.data;
const auto srcStep = src.step[ 0 ];
auto *sumStart = reinterpret_cast<double *>(sum.data) + sum.step1() + 1;
const auto sumStep = sum.step[ 0 ] / sum.step[ 1 ];
auto *sqSumStart = reinterpret_cast<double *>(sqSum.data) + sqSum.step1() + 1;
const auto sqSumStep = sqSum.step[ 0 ] / sqSum.step[ 1 ];
const auto end = size.width - simdSize(cv::v_uint8);
for (int y = 0; y < src.rows; y++) {
auto *srcPtr = srcStart + srcStep * y;
auto *sumPtr = sumStart + sumStep * y;
const auto *preSumPtr = sumStart + sumStep * (y - 1);
sumPtr[ -1 ] = 0;
cv::v_uint32 prevSum = cv::vx_setzero_u32();
for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
cv::v_uint16 v1;
cv::v_uint16 v2;
cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
integralSum(v1, v2, sumPtr + x, preSumPtr + x, prevSum);
}
}
for (int y = 0; y < src.rows; y++) {
auto *srcPtr = srcStart + srcStep * y;
auto *sqSumPtr = sqSumStart + sqSumStep * y;
auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
sqSumPtr[ -1 ] = 0;
cv::v_uint32 prevSqSum = cv::vx_setzero_u32();
for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
cv::v_uint16 v1;
cv::v_uint16 v2;
cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
integralSqSum(v1, v2, sqSumPtr + x, preSqSumPtr + x, prevSqSum);
}
}
const auto start = src.cols - src.cols % simdSize(cv::v_uint8);
for (int y = 0; y < src.rows; y++) {
auto *srcPtr = srcStart + srcStep * y;
auto *sumPtr = sumStart + sumStep * y;
auto *sqSumPtr = sqSumStart + sqSumStep * y;
const auto *preSumPtr = sumStart + sumStep * (y - 1);
const auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
for (int x = start; x < src.cols; x++) {
const auto val = srcPtr[ x ];
const auto sqVal = val * val;
sumPtr[ x ] = sumPtr[ x - 1 ] + val + preSumPtr[ x ] - preSumPtr[ x - 1 ];
sqSumPtr[ x ] = sqSumPtr[ x - 1 ] + sqVal + preSqSumPtr[ x ] - preSqSumPtr[ x - 1 ];
}
}
}

View File

@ -1,8 +0,0 @@
#ifndef INTEGRAL_H
#define INTEGRAL_H
#include <opencv2/opencv.hpp>
void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum);
#endif // INTEGRAL_H

View File

@ -1,9 +1,9 @@
#include "grayMatch.h"
#include <fstream>
#include <iostream>
#include <opencv2/core/utility.hpp>
#include <opencv2/opencv.hpp>
#include <string>
int main(int argc, const char *argv[]) {
const std::string keys = "{model m || model image}"
@ -24,8 +24,8 @@ int main(int argc, const char *argv[]) {
return 0;
}
auto srcFile = std::string(IMG_DIR) + "/3.bmp";
auto dstFile = std::string(IMG_DIR) + "/h.bmp";
auto srcFile = std::string(IMG_DIR) + "/model3.png";
auto dstFile = std::string(IMG_DIR) + "/model3_src2.png";
if (cmd.has("model"))
srcFile = cmd.get<std::string>("model");
if (cmd.has("scene"))
@ -37,66 +37,19 @@ int main(int argc, const char *argv[]) {
return -1;
}
const std::string modelName("model.bin");
{
auto t0 = cv::getTickCount();
auto model = trainModel(src.data, src.cols, src.rows, src.channels(),
static_cast<int>(src.step), 0, 0, src.cols, src.rows, -1);
auto t1 = cv::getTickCount();
// get size
int size;
serialize(model, nullptr, &size);
// serialize to buffer
std::vector<uchar> buffer(size);
serialize(model, buffer.data(), &size);
// write to file
std::ofstream ofs(modelName, std::ios::binary | std::ios::out);
if (!ofs.is_open()) {
return -1;
}
ofs.write(reinterpret_cast<const char *>(buffer.data()), size);
freeModel(&model);
auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
std::cout << "train(s):" << trainCost << std::endl;
}
int count = 70;
std::vector<Pose> poses(count);
Model_t model;
auto score = cmd.get<float>("threshold");
{
// open file
std::ifstream ifs(modelName, std::ios::binary | std::ios::in);
if (!ifs.is_open()) {
return -2;
}
// get size
ifs.seekg(0, std::ios::end);
auto size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
// read to buffer
std::vector<uchar> buffer(size);
ifs.read(reinterpret_cast<char *>(buffer.data()), size);
// deserialize from buffer
model = deserialize(buffer.data(), static_cast<int>(buffer.size()));
auto t2 = cv::getTickCount();
matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0, 0,
dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
auto t3 = cv::getTickCount();
auto matchCost = static_cast<double>(t3 - t2) / cv::getTickFrequency();
std::cout << "match(s):" << matchCost << std::endl;
}
auto t0 = cv::getTickCount();
auto model = trainModel(src, -1, 0, 360, -1);
auto t1 = cv::getTickCount();
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
auto t2 = cv::getTickCount();
const auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
const auto matchCost = static_cast<double>(t2 - t1) / cv::getTickFrequency();
std::cout << "train(s):" << trainCost << " match(s):" << matchCost << std::endl;
for (int i = 0; i < count; i++) {
const auto &pose = poses[ i ];
std::cout << pose.x << "," << pose.y << "," << pose.angle << "," << pose.score << std::endl;
@ -107,8 +60,7 @@ int main(int argc, const char *argv[]) {
auto start = cv::getTickCount();
for (int i = 0; i < times; i++) {
matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0,
0, dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
count = 70;
}
auto end = cv::getTickCount();

View File

@ -2,40 +2,49 @@
#include <opencv2/core.hpp>
struct HRLE {
int row = -1;
int startColumn = -1;
int length = 0;
};
struct VRLE {
int col = -1;
int startRow = -1;
int length = 0;
};
using HRegion = std::vector<HRLE>;
using VRegion = std::vector<VRLE>;
struct Template {
cv::Mat img;
HRegion hRegion;
VRegion vRegion;
cv::RotatedRect rect;
double mean = 0;
double normal = 0;
double invArea = 0;
};
struct Layer {
double angleStep = 0;
std::vector<Template> templates;
};
struct Model {
std::vector<cv::Mat> pyramids;
std::vector<cv::Scalar> mean;
std::vector<double> normal;
std::vector<double> invArea;
std::vector<uchar> equal1;
uchar borderColor = 0;
double startAngle = 0;
double stopAngle = 0;
double angleStep = 0;
void clear() {
pyramids.clear();
normal.clear();
invArea.clear();
mean.clear();
equal1.clear();
}
void resize(const std::size_t size) {
normal.resize(size);
invArea.resize(size);
mean.resize(size);
equal1.resize(size);
}
void reserve(const std::size_t size) {
pyramids.reserve(size);
normal.reserve(size);
invArea.reserve(size);
mean.reserve(size);
equal1.reserve(size);
}
cv::Size srcSize;
std::vector<Layer> layers;
};
#if CV_VERSION_MAJOR >= 4 && CV_VERSION_MINOR >= 8
#define simdSize(type) cv::VTraits<type>::nlanes
#else
#define simdSize(type) type::nlanes
#endif
#endif

View File

@ -1,228 +0,0 @@
#include "grayMatch.h"
#include "privateType.h"
#include <opencv2/core/hal/intrin.hpp>
class Buffer {
public:
Buffer(const int size_, unsigned char *data_)
: m_size(size_)
, m_data(data_) {}
virtual ~Buffer() = default;
virtual void operator&(uchar &val) = 0;
virtual void operator&(std::vector<cv::Mat> &val) = 0;
virtual void operator&(std::vector<cv::Scalar> &val) = 0;
virtual void operator&(std::vector<double> &val) = 0;
virtual void operator&(std::vector<uchar> &val) = 0;
void operator&(Model &val) {
this->operator&(val.pyramids);
this->operator&(val.mean);
this->operator&(val.normal);
this->operator&(val.invArea);
this->operator&(val.equal1);
this->operator&(val.borderColor);
}
[[nodiscard]] int count() const {
return m_size;
}
protected:
int m_size = 0;
unsigned char *m_data = nullptr;
};
void binWrite(void *const dst, const void *src, const int size) {
memcpy(dst, src, size);
}
void fakeWrite(void *const dst, const void *src, const int size) {
(void)dst;
(void)src;
(void)size;
}
using Write = void (*)(void *, const void *, int);
template <Write write> class OutBuffer final : public Buffer {
public:
explicit OutBuffer(unsigned char *const data_)
: Buffer(0, data_) {}
void operator&(uchar &val) override {
write(m_data + m_size, &val, sizeof(val));
m_size += static_cast<int>(sizeof(val));
}
void operator&(std::vector<cv::Mat> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
for (auto &element : val) {
writeElement(element);
}
}
void writeElement(cv::Mat &val) {
write(m_data + m_size, &val.cols, sizeof(int));
m_size += static_cast<int>(sizeof(int));
write(m_data + m_size, &val.rows, sizeof(int));
m_size += static_cast<int>(sizeof(int));
for (int i = 0; i < val.rows; i++) {
write(m_data + m_size, val.ptr<unsigned char>(i), val.cols);
m_size += val.cols;
}
}
void operator&(std::vector<cv::Scalar> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
for (auto &element : val) {
writeElement(element);
}
}
void writeElement(const cv::Scalar &val) {
write(m_data + m_size, val.val, sizeof(double) * 4);
m_size += static_cast<int>(sizeof(double)) * 4;
}
void operator&(std::vector<double> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
write(m_data + m_size, val.data(), static_cast<int>(sizeof(double)) * size);
m_size += static_cast<int>(sizeof(double)) * size;
}
void operator&(std::vector<uchar> &val) override {
const int size = static_cast<int>(val.size());
write(m_data + m_size, &size, sizeof(size));
m_size += static_cast<int>(sizeof(size));
write(m_data + m_size, val.data(), sizeof(uchar) * size);
m_size += static_cast<int>(sizeof(uchar)) * size;
}
};
using SizeCountBuffer = OutBuffer<fakeWrite>;
using WriteBuffer = OutBuffer<binWrite>;
class ReadBuffer final : public Buffer {
public:
explicit ReadBuffer(unsigned char *data_)
: Buffer(0, data_) {}
void operator&(uchar &val) override {
memcpy(&val, m_data + m_size, sizeof(uchar));
m_size += static_cast<int>(sizeof(uchar));
}
void operator&(std::vector<cv::Mat> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
for (auto &element : val) {
read(element);
}
}
void read(cv::Mat &val) {
int width = 0;
memcpy(&width, m_data + m_size, sizeof(int));
m_size += static_cast<int>(sizeof(int));
int height = 0;
memcpy(&height, m_data + m_size, sizeof(int));
m_size += static_cast<int>(sizeof(int));
const int alignedWidth = static_cast<int>(cv::alignSize(width, simdSize(cv::v_uint8)));
const auto img = cv::Mat::zeros(height, alignedWidth, CV_8UC1);
val = img(cv::Rect(0, 0, width, height));
for (int y = 0; y < height; y++) {
auto *ptr = val.ptr<uchar>(y);
memcpy(ptr, m_data + m_size, width);
m_size += width;
}
}
void operator&(std::vector<cv::Scalar> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
for (auto &element : val) {
read(element);
}
}
void read(cv::Scalar &val) {
memcpy(val.val, m_data + m_size, sizeof(double) * 4);
m_size += static_cast<int>(sizeof(double)) * 4;
}
void operator&(std::vector<double> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
memcpy(val.data(), m_data + m_size, sizeof(double) * count);
m_size += static_cast<int>(sizeof(double)) * count;
}
void operator&(std::vector<uchar> &val) override {
int count = 0;
memcpy(&count, m_data + m_size, sizeof(int));
val.resize(count);
m_size += static_cast<int>(sizeof(count));
memcpy(val.data(), m_data + m_size, sizeof(bool) * count);
m_size += static_cast<int>(sizeof(uchar)) * count;
}
};
void operation(Buffer *buf, Model &model) {
*buf &model;
}
bool serialize(Model *const model, unsigned char *buffer, int *size) {
if (nullptr == size) {
return false;
}
if (nullptr == model) {
*size = 0;
return false;
}
SizeCountBuffer counter(buffer);
operation(&counter, *model);
if (nullptr == buffer) {
*size = counter.count();
return true;
}
if (counter.count() > *size) {
*size = 0;
return false;
}
WriteBuffer writer(buffer);
operation(&writer, *model);
return true;
}
Model_t deserialize(unsigned char *buffer, const int size) {
if (size < 1 || nullptr == buffer) {
return nullptr;
}
ReadBuffer reader(buffer);
auto *model = new Model;
operation(&reader, *model);
return model;
}

240
sum.cpp Normal file
View File

@ -0,0 +1,240 @@
#include "sum.h"
#include <opencv2/core/hal/intrin.hpp>
inline cv::v_uint32x4 v_add_expand(const cv::v_uint16x8 &src) {
cv::v_uint32x4 low;
cv::v_uint32x4 high;
cv::v_expand(src, low, high);
return cv::v_add(low, high);
}
inline cv::v_uint64x2 v_add_expand(const cv::v_uint32x4 &src) {
cv::v_uint64x2 low;
cv::v_uint64x2 high;
cv::v_expand(src, low, high);
return cv::v_add(low, high);
}
inline void computeSum(const cv::v_uint8x16 &src, cv::v_uint32x4 &sum, cv::v_uint64x2 &sqSum) {
cv::v_uint16x8 low;
cv::v_uint16x8 high;
cv::v_expand(src, low, high);
sum = cv::v_add(sum, v_add_expand(cv::v_add(low, high)));
const auto dot = cv::v_dotprod_expand_fast(src, src);
sqSum = cv::v_add(sqSum, v_add_expand(dot));
}
void computeSum(const cv::Mat &src, const HRegion &hRegion, uint64 &sum, uint64 &sqSum) {
constexpr auto blockSize = simdSize(cv::v_uint8);
const auto *srcPtr = src.data;
cv::v_uint32x4 vSum = cv::v_setzero_u32();
cv::v_uint64x2 vSqSum = cv::v_setzero_u64();
uint32_t partSum = 0;
uint64 partSqSum = 0;
for (const auto &rle : hRegion) {
const auto *ptr = srcPtr + src.step * rle.row + rle.startColumn;
int i = 0;
for (; i < rle.length - blockSize; i += blockSize) {
computeSum(cv::v_load(ptr + i), vSum, vSqSum);
}
// TODO aligned fill 0
for (; i < rle.length; i++) {
const auto val = ptr[ i ];
partSum += val;
partSqSum += static_cast<ushort>(val) * static_cast<ushort>(val);
}
}
sum = cv::v_reduce_sum(vSum) + partSum;
sqSum = cv::v_reduce_sum(vSqSum) + partSqSum;
}
inline void computeSumDiff(const cv::v_uint16x8 &start, const cv::v_uint16x8 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1) {
cv::v_int16x8 sub;
{
const auto vStart = cv::v_reinterpret_as_s16(start);
const auto vEnd = cv::v_reinterpret_as_s16(end);
sub = cv::v_sub(vEnd, vStart);
}
cv::v_int32x4 val = cv::v_expand_low(sub);
diff0 = cv::v_add(diff0, val);
val = cv::v_expand_high(sub);
diff1 = cv::v_add(diff1, val);
}
inline void computeSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
cv::v_int32x4 &diff3) {
computeSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0, diff1);
computeSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff2, diff3);
}
inline void computeSqSumDiff(const cv::v_uint32x4 &start, const cv::v_uint32x4 &end,
cv::v_int32x4 &diff0) {
const cv::v_int32x4 vStart = cv::v_reinterpret_as_s32(start);
const cv::v_int32x4 vEnd = cv::v_reinterpret_as_s32(end);
const cv::v_int32x4 sub = cv::v_sub(vEnd, vStart);
diff0 = cv::v_add(diff0, sub);
}
inline void computeSqSumDiff(cv::v_uint16x8 &start, cv::v_uint16x8 &end, cv::v_int32x4 &diff0,
cv::v_int32x4 &diff1) {
start = cv::v_mul(start, start);
end = cv::v_mul(end, end);
computeSqSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0);
computeSqSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff1);
}
inline void computeSqSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
cv::v_int32x4 &diff3) {
auto vStart = cv::v_expand_low(start);
auto vEnd = cv::v_expand_low(end);
computeSqSumDiff(vStart, vEnd, diff0, diff1);
vStart = cv::v_expand_high(start);
vEnd = cv::v_expand_high(end);
computeSqSumDiff(vStart, vEnd, diff2, diff3);
}
inline void v_expand_store(double *ptr, const std::array<int, 4> &val) {
ptr[ 0 ] = ptr[ -1 ] + val[ 0 ];
ptr[ 1 ] = ptr[ 0 ] + val[ 1 ];
ptr[ 2 ] = ptr[ 1 ] + val[ 2 ];
ptr[ 3 ] = ptr[ 2 ] + val[ 3 ];
}
void shiftH(const uchar *src, std::size_t srcStep, const HRegion &hRegion, int row, double *sum,
std::size_t sumStep, int sumWidth, double *sqSum, std::size_t sqSumStep) {
constexpr auto blockSize = simdSize(cv::v_uint8);
auto *srcPtr = src;
auto *sumPtr = sum + row * sumStep;
auto *sqSumPtr = sqSum + row * sqSumStep;
std::array<int, 4> buf{};
int i = 1;
for (; i < sumWidth - blockSize; i += blockSize) {
cv::v_int32x4 diff0 = cv::v_setzero_s32();
cv::v_int32x4 diff1 = cv::v_setzero_s32();
cv::v_int32x4 diff2 = cv::v_setzero_s32();
cv::v_int32x4 diff3 = cv::v_setzero_s32();
cv::v_int32x4 diff10 = cv::v_setzero_s32();
cv::v_int32x4 diff11 = cv::v_setzero_s32();
cv::v_int32x4 diff12 = cv::v_setzero_s32();
cv::v_int32x4 diff13 = cv::v_setzero_s32();
for (const auto &rle : hRegion) {
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
auto *endPtr = startPtr + rle.length;
auto vStart = cv::v_load(startPtr);
auto vEnd = cv::v_load(endPtr);
computeSumDiff(vStart, vEnd, diff0, diff1, diff2, diff3);
computeSqSumDiff(vStart, vEnd, diff10, diff11, diff12, diff13);
}
auto *sumPtrStart = sumPtr + i;
cv::v_store(buf.data(), diff0);
v_expand_store(sumPtrStart, buf);
cv::v_store(buf.data(), diff1);
v_expand_store(sumPtrStart + 4, buf);
cv::v_store(buf.data(), diff2);
v_expand_store(sumPtrStart + 8, buf);
cv::v_store(buf.data(), diff3);
v_expand_store(sumPtrStart + 12, buf);
auto *sqSumPtrStart = sqSumPtr + i;
cv::v_store(buf.data(), diff10);
v_expand_store(sqSumPtrStart, buf);
cv::v_store(buf.data(), diff11);
v_expand_store(sqSumPtrStart + 4, buf);
cv::v_store(buf.data(), diff12);
v_expand_store(sqSumPtrStart + 8, buf);
cv::v_store(buf.data(), diff13);
v_expand_store(sqSumPtrStart + 12, buf);
}
for (; i < sumWidth; i++) {
int32_t partSum = 0;
int32_t partSqSum = 0;
for (const auto &rle : hRegion) {
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
auto *endPtr = startPtr + rle.length;
const int32_t start = *startPtr;
const int32_t end = *endPtr;
partSum += end - start;
partSqSum += end * end - start * start;
}
auto *sumPtrStart = sumPtr + i;
sumPtrStart[ 0 ] = sumPtrStart[ -1 ] + partSum;
auto *sqSumPtrStart = sqSumPtr + i;
sqSumPtrStart[ 0 ] = sqSumPtrStart[ -1 ] + partSqSum;
}
}
void shiftV(const uchar *src, std::size_t srcStep, const VRegion &vRegion, int row, double *sum,
std::size_t sumStep, double *sqSum, std::size_t sqSumStep) {
auto *srcPtr = src;
auto *sumPtr = sum + row * sumStep;
auto *sqSumPtr = sqSum + row * sqSumStep;
int32_t partSum = 0;
int32_t partSqSum = 0;
for (const auto &rle : vRegion) {
auto *startPtr = srcPtr + (row + rle.startRow - 1) * srcStep + rle.col;
auto *endPtr = startPtr + rle.length * srcStep;
const int32_t start = *startPtr;
const int32_t end = *endPtr;
partSum += end - start;
partSqSum += end * end - start * start;
}
sumPtr[ 0 ] = *(sumPtr - sumStep) + partSum;
sqSumPtr[ 0 ] = *(sqSumPtr - sqSumStep) + partSqSum;
}
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
const HRegion &hRegion, const VRegion &vRegion) {
const auto size = src.size() - templateSize + cv::Size(1, 1);
sum.create(size, CV_64FC1);
sqSum.create(size, CV_64FC1);
const auto *srcPtr = src.data;
auto *sumPtr = reinterpret_cast<double *>(sum.data);
auto *sqSumPtr = reinterpret_cast<double *>(sqSum.data);
const auto sumStep = sum.step1();
const auto sqSumStep = sqSum.step1();
// compute first
uint64 sum0;
uint64 sqSum0;
computeSum(src, hRegion, sum0, sqSum0);
sumPtr[ 0 ] = static_cast<double>(sum0);
sqSumPtr[ 0 ] = static_cast<double>(sqSum0);
for (int y = 0; y < size.height; y++) {
shiftH(srcPtr, src.step, hRegion, y, sumPtr, sumStep, sum.cols, sqSumPtr, sqSumStep);
if (y + 1 < size.height) {
shiftV(srcPtr, src.step, vRegion, y + 1, sumPtr, sumStep, sqSumPtr, sqSumStep);
}
}
}

8
sum.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include "privateType.h"
#include <opencv2/opencv.hpp>
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
const HRegion &hRegion, const VRegion &vRegion);