52 Commits

Author SHA1 Message Date
ad95fac40c misc 2025-05-08 21:26:11 +08:00
699169140a static check 2025-05-08 17:20:04 +08:00
5e02244e93 fix:table format 2025-05-08 17:06:00 +08:00
1a49d466fe fix:type 2025-05-08 16:44:08 +08:00
a9d69311a8 update readme 2025-03-27 13:45:00 +08:00
11517dad5c add match bench 2025-03-18 20:59:47 +08:00
5035233913 misc 2025-03-13 20:46:05 +08:00
2d467ebbef misc 2025-03-13 15:41:28 +08:00
ca1e8149a1 cmd config 2025-03-13 14:58:42 +08:00
1749a2c720 export library 2025-03-13 11:42:10 +08:00
bcdade9daa Revert "aligned scene"
This reverts commit e5cf60e2547db7e7b27d03080b16bf1db79807bf.
2025-03-13 09:21:37 +08:00
cfdb84093d Revert "aligned scene"
This reverts commit 31c51d77ba8c98ef501d204e2ceb1638cc0e6728.
2025-03-13 09:21:26 +08:00
31c51d77ba aligned scene 2025-03-13 09:10:12 +08:00
e5cf60e254 aligned scene 2025-03-12 23:12:02 +08:00
69226dd197 omp optim 2025-03-02 12:07:13 +08:00
d9a50a40d1 code clean 2025-03-01 18:12:02 +08:00
e6f440006b add readme 2025-02-27 20:58:08 +08:00
fa61990d3c add default image 2025-02-27 20:08:44 +08:00
b891c34176 add image 2025-02-27 20:05:42 +08:00
d119d45722 less step call 2025-02-27 09:39:04 +08:00
25f0b54a5a fix: crash unaligned load 2025-02-27 08:59:29 +08:00
4b26673655 opti dot product 2025-02-26 23:12:21 +08:00
f6ee887e20 misc 2025-02-26 17:56:50 +08:00
9bf943b8d1 uniform dot_product & unroll 2025-02-26 11:36:01 +08:00
3c3b0d4632 aligned model & less ptr call 2025-02-26 11:08:25 +08:00
d7afc907c2 misc 2025-02-26 10:10:08 +08:00
1b2286100e less ptr call 2025-02-26 10:05:18 +08:00
82320082fa static check: small fix 2025-02-26 08:44:11 +08:00
59bbc596bb clear 2025-02-25 22:20:56 +08:00
b041883bca enable openmp support 2025-02-25 17:54:27 +08:00
b4042ca7cf misc 2025-02-25 16:29:54 +08:00
c879408141 misc 2025-02-25 15:18:25 +08:00
d8d888c045 misc 2025-02-25 13:57:14 +08:00
a0f53bc38e misc 2025-02-25 11:08:29 +08:00
7346fc74b9 misc 2025-02-24 22:31:42 +08:00
93220b592e misc 2025-02-24 18:13:27 +08:00
4708a02218 init mask sum 2025-02-24 15:46:05 +08:00
88fdb73ae2 build config 2024-09-10 22:23:17 +08:00
a19993a747 misc 2024-09-03 11:23:37 +08:00
7d7cba4a4c rotation model finish 2024-09-03 09:34:41 +08:00
df05d073c7 misc 2024-09-02 18:12:59 +08:00
bf6fb79131 misc 2024-09-02 17:33:57 +08:00
18a73abd03 default 2024-09-02 14:21:56 +08:00
e54921809c misc 2024-09-02 11:53:42 +08:00
34e8f8bd67 misc 2024-09-02 10:33:14 +08:00
e0202a0fd9 misc 2024-08-28 18:14:29 +08:00
199ea46bb1 misc 2024-08-28 17:19:59 +08:00
f794aeb840 misc 2024-08-27 22:17:29 +08:00
1efe5f11ac misc 2024-08-27 17:56:17 +08:00
1081c707af misc 2024-08-26 17:28:35 +08:00
489d5428e3 add template rotation 2024-08-26 15:19:23 +08:00
dea266853c misc 2024-08-25 18:03:33 +08:00
11 changed files with 953 additions and 527 deletions

View File

@ -1,17 +1,58 @@
cmake_minimum_required(VERSION 3.12)
project(match)
find_package(OpenCV REQUIRED)
add_executable(${PROJECT_NAME}
main.cpp
option(ENABLE_OPENMP "enable openmp" OFF)
if(ENABLE_OPENMP)
# find OpenMP
find_package(OpenMP REQUIRED)
if(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif(OPENMP_FOUND)
endif(ENABLE_OPENMP)
#==============================================================
#library
#==============================================================
add_library(algo SHARED
grayMatch.h
grayMatch.cpp
sum.h
sum.cpp
privateType.h
apiExport.h
)
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBRARIES})
target_compile_options(${PROJECT_NAME} PRIVATE
target_include_directories(algo PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(algo ${OpenCV_LIBRARIES} $<$<BOOL:${OPENMP_FOUND}>:OpenMP::OpenMP_CXX>)
target_compile_options(algo PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:-fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:-mlsx>
)
target_compile_definitions(algo PRIVATE API_EXPORTS
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:CV_LSX>
)
#==============================================================
#exe
#==============================================================
add_executable(${PROJECT_NAME}
main.cpp
)
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBRARIES} algo)
target_compile_options(${PROJECT_NAME} PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
)
target_compile_definitions(${PROJECT_NAME} PRIVATE IMG_DIR="${CMAKE_CURRENT_SOURCE_DIR}/img")
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")

13
README.md Normal file
View File

@ -0,0 +1,13 @@
# Template match with gray model(ncc)
## rotate-model vs main branch
| method | main(ms) | rotate-model(ms) | factor(main/rotate-model) |
|-----------|----------|------------------|---------------------------|
| train | 1 | 680 | 1/680 |
| match | 31 | 16 | 2 |
| train-omp | 1 | 160 | 1/160 |
| match-omp | 12 | 6 | 2 |
result: main brain method train model really fast(1ms) and parameter free, rotate-model method 2x faster in matching.

31
apiExport.h Normal file
View File

@ -0,0 +1,31 @@
#pragma once
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
#define API_EXPORT __declspec(dllexport)
#define API_IMPORT __declspec(dllimport)
#define API_LOCAL
#elif defined(linux) || defined(__linux) || defined(__linux__)
#define API_EXPORT __attribute__((visibility("default")))
#define API_IMPORT __attribute__((visibility("default")))
#define API_LOCAL __attribute__((visibility("hidden")))
#elif defined(__APPLE__)
#define API_EXPORT __attribute__((visibility("default")))
#define API_IMPORT __attribute__((visibility("default")))
#define API_LOCAL __attribute__((visibility("hidden")))
#else
#define API_EXPORT
#define API_IMPORT
#define API_LOCAL
#endif
#ifdef __cplusplus
#define API_DEMANGLED extern "C"
#else
#define API_DEMANGLED
#endif
#ifdef API_EXPORTS
#define API_PUBLIC API_DEMANGLED API_EXPORT
#else
#define API_PUBLIC API_DEMANGLED API_IMPORT
#endif

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,8 @@
#include <opencv2/opencv.hpp>
#include "apiExport.h"
struct Model;
struct Pose {
@ -12,14 +14,11 @@ struct Pose {
float score;
};
Model *trainModel(const cv::Mat &src, int level);
API_PUBLIC Model *trainModel(const cv::Mat &src, int level, double startAngle, double spanAngle,
double angleStep);
std::vector<Pose> matchModel(const cv::Mat &dst, const Model *model, int level, double startAngle,
double spanAngle, double maxOverlap, double minScore, int maxCount,
int subpixel);
void serialize(Model *model, int &size, uint8_t *buffer);
Model *deserialize(int size, uint8_t *buffer);
API_PUBLIC void matchModel(const cv::Mat &dst, const Model *model, int *count, Pose *poses,
int level, double startAngle, double spanAngle, double maxOverlap,
double minScore, int subpixel);
#endif // GRAY_MATCH_H

BIN
img/model3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

BIN
img/model3_src2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 336 KiB

View File

@ -1,42 +1,93 @@
#include "grayMatch.h"
#include <iostream>
#include <opencv2/core/utility.hpp>
#include <opencv2/opencv.hpp>
#include <string>
int main() {
auto src =
cv::imread("C:/Users/qiuyong/Desktop/test/template/model3.bmp", cv::IMREAD_GRAYSCALE);
auto dst =
cv::imread("C:/Users/qiuyong/Desktop/test/template/model3_src1.bmp", cv::IMREAD_GRAYSCALE);
int main(int argc, const char *argv[]) {
const std::string keys = "{model m || model image}"
"{scene s || scene image}"
"{view v || view result}"
"{threshold t | 0.7 | match minium score}"
"{bench b || match benchmark}"
"{help h || print this help}";
cv::CommandLineParser cmd(argc, argv, keys);
if (!cmd.check()) {
cmd.printErrors();
return -1;
}
if (cmd.has("help")) {
cmd.printMessage();
return 0;
}
auto srcFile = std::string(IMG_DIR) + "/model3.png";
auto dstFile = std::string(IMG_DIR) + "/model3_src2.png";
if (cmd.has("model"))
srcFile = cmd.get<std::string>("model");
if (cmd.has("scene"))
dstFile = cmd.get<std::string>("scene");
auto src = cv::imread(srcFile, cv::IMREAD_GRAYSCALE);
auto dst = cv::imread(dstFile, cv::IMREAD_GRAYSCALE);
if (src.empty() || dst.empty()) {
return -1;
}
int count = 70;
std::vector<Pose> poses(count);
auto score = cmd.get<float>("threshold");
auto t0 = cv::getTickCount();
auto model = trainModel(src, -1);
auto model = trainModel(src, -1, 0, 360, -1);
auto t1 = cv::getTickCount();
auto poses = matchModel(dst, model, -1, 0, 360, 0, 0.5, 70, 1);
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
auto t2 = cv::getTickCount();
auto trainCost = double(t1 - t0) / cv::getTickFrequency();
auto matchCost = double(t2 - t1) / cv::getTickFrequency();
const auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
const auto matchCost = static_cast<double>(t2 - t1) / cv::getTickFrequency();
std::cout << "train(s):" << trainCost << " match(s):" << matchCost << std::endl;
for (int i = 0; i < count; i++) {
const auto &pose = poses[ i ];
std::cout << pose.x << "," << pose.y << "," << pose.angle << "," << pose.score << std::endl;
}
if (cmd.has("bench")) {
constexpr int times = 100;
auto start = cv::getTickCount();
for (int i = 0; i < times; i++) {
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
count = 70;
}
auto end = cv::getTickCount();
const auto cost = static_cast<double>(end - start) / cv::getTickFrequency() / times;
std::cout << "match bench avg(s):" << cost << std::endl;
}
if (cmd.has("view")) {
cv::Mat color;
cv::cvtColor(dst, color, cv::COLOR_GRAY2RGB);
for (auto &pose : poses) {
for (int i = 0; i < count; i++) {
const auto &pose = poses[ i ];
cv::RotatedRect rect(cv::Point2f(pose.x, pose.y), src.size(), -pose.angle);
std::vector<cv::Point2f> pts;
cv::Point2f pts[ 4 ];
rect.points(pts);
cv::line(color, pts[ 0 ], pts[ 1 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
cv::line(color, pts[ 1 ], pts[ 2 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
cv::line(color, pts[ 2 ], pts[ 3 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
cv::line(color, pts[ 3 ], pts[ 0 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
std::cout << pose.x << "," << pose.y << "," << pose.angle << "," << pose.score << std::endl;
}
cv::imshow("img", color);
cv::waitKey();
}
return 0;
}

50
privateType.h Normal file
View File

@ -0,0 +1,50 @@
#pragma once
#include <opencv2/core.hpp>
struct HRLE {
int row = -1;
int startColumn = -1;
int length = 0;
};
struct VRLE {
int col = -1;
int startRow = -1;
int length = 0;
};
using HRegion = std::vector<HRLE>;
using VRegion = std::vector<VRLE>;
struct Template {
cv::Mat img;
HRegion hRegion;
VRegion vRegion;
cv::RotatedRect rect;
double mean = 0;
double normal = 0;
double invArea = 0;
};
struct Layer {
double angleStep = 0;
std::vector<Template> templates;
};
struct Model {
double startAngle = 0;
double stopAngle = 0;
double angleStep = 0;
cv::Size srcSize;
std::vector<Layer> layers;
};
#if CV_VERSION_MAJOR >= 4 && CV_VERSION_MINOR >= 8
#define simdSize(type) cv::VTraits<type>::nlanes
#else
#define simdSize(type) type::nlanes
#endif

240
sum.cpp Normal file
View File

@ -0,0 +1,240 @@
#include "sum.h"
#include <opencv2/core/hal/intrin.hpp>
inline cv::v_uint32x4 v_add_expand(const cv::v_uint16x8 &src) {
cv::v_uint32x4 low;
cv::v_uint32x4 high;
cv::v_expand(src, low, high);
return cv::v_add(low, high);
}
inline cv::v_uint64x2 v_add_expand(const cv::v_uint32x4 &src) {
cv::v_uint64x2 low;
cv::v_uint64x2 high;
cv::v_expand(src, low, high);
return cv::v_add(low, high);
}
inline void computeSum(const cv::v_uint8x16 &src, cv::v_uint32x4 &sum, cv::v_uint64x2 &sqSum) {
cv::v_uint16x8 low;
cv::v_uint16x8 high;
cv::v_expand(src, low, high);
sum = cv::v_add(sum, v_add_expand(cv::v_add(low, high)));
const auto dot = cv::v_dotprod_expand_fast(src, src);
sqSum = cv::v_add(sqSum, v_add_expand(dot));
}
void computeSum(const cv::Mat &src, const HRegion &hRegion, uint64 &sum, uint64 &sqSum) {
constexpr auto blockSize = simdSize(cv::v_uint8);
const auto *srcPtr = src.data;
cv::v_uint32x4 vSum = cv::v_setzero_u32();
cv::v_uint64x2 vSqSum = cv::v_setzero_u64();
uint32_t partSum = 0;
uint64 partSqSum = 0;
for (const auto &rle : hRegion) {
const auto *ptr = srcPtr + src.step * rle.row + rle.startColumn;
int i = 0;
for (; i < rle.length - blockSize; i += blockSize) {
computeSum(cv::v_load(ptr + i), vSum, vSqSum);
}
// TODO aligned fill 0
for (; i < rle.length; i++) {
const auto val = ptr[ i ];
partSum += val;
partSqSum += static_cast<ushort>(val) * static_cast<ushort>(val);
}
}
sum = cv::v_reduce_sum(vSum) + partSum;
sqSum = cv::v_reduce_sum(vSqSum) + partSqSum;
}
inline void computeSumDiff(const cv::v_uint16x8 &start, const cv::v_uint16x8 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1) {
cv::v_int16x8 sub;
{
const auto vStart = cv::v_reinterpret_as_s16(start);
const auto vEnd = cv::v_reinterpret_as_s16(end);
sub = cv::v_sub(vEnd, vStart);
}
cv::v_int32x4 val = cv::v_expand_low(sub);
diff0 = cv::v_add(diff0, val);
val = cv::v_expand_high(sub);
diff1 = cv::v_add(diff1, val);
}
inline void computeSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
cv::v_int32x4 &diff3) {
computeSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0, diff1);
computeSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff2, diff3);
}
inline void computeSqSumDiff(const cv::v_uint32x4 &start, const cv::v_uint32x4 &end,
cv::v_int32x4 &diff0) {
const cv::v_int32x4 vStart = cv::v_reinterpret_as_s32(start);
const cv::v_int32x4 vEnd = cv::v_reinterpret_as_s32(end);
const cv::v_int32x4 sub = cv::v_sub(vEnd, vStart);
diff0 = cv::v_add(diff0, sub);
}
inline void computeSqSumDiff(cv::v_uint16x8 &start, cv::v_uint16x8 &end, cv::v_int32x4 &diff0,
cv::v_int32x4 &diff1) {
start = cv::v_mul(start, start);
end = cv::v_mul(end, end);
computeSqSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0);
computeSqSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff1);
}
inline void computeSqSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
cv::v_int32x4 &diff3) {
auto vStart = cv::v_expand_low(start);
auto vEnd = cv::v_expand_low(end);
computeSqSumDiff(vStart, vEnd, diff0, diff1);
vStart = cv::v_expand_high(start);
vEnd = cv::v_expand_high(end);
computeSqSumDiff(vStart, vEnd, diff2, diff3);
}
inline void v_expand_store(double *ptr, const std::array<int, 4> &val) {
ptr[ 0 ] = ptr[ -1 ] + val[ 0 ];
ptr[ 1 ] = ptr[ 0 ] + val[ 1 ];
ptr[ 2 ] = ptr[ 1 ] + val[ 2 ];
ptr[ 3 ] = ptr[ 2 ] + val[ 3 ];
}
void shiftH(const uchar *src, std::size_t srcStep, const HRegion &hRegion, int row, double *sum,
std::size_t sumStep, int sumWidth, double *sqSum, std::size_t sqSumStep) {
constexpr auto blockSize = simdSize(cv::v_uint8);
auto *srcPtr = src;
auto *sumPtr = sum + row * sumStep;
auto *sqSumPtr = sqSum + row * sqSumStep;
std::array<int, 4> buf{};
int i = 1;
for (; i < sumWidth - blockSize; i += blockSize) {
cv::v_int32x4 diff0 = cv::v_setzero_s32();
cv::v_int32x4 diff1 = cv::v_setzero_s32();
cv::v_int32x4 diff2 = cv::v_setzero_s32();
cv::v_int32x4 diff3 = cv::v_setzero_s32();
cv::v_int32x4 diff10 = cv::v_setzero_s32();
cv::v_int32x4 diff11 = cv::v_setzero_s32();
cv::v_int32x4 diff12 = cv::v_setzero_s32();
cv::v_int32x4 diff13 = cv::v_setzero_s32();
for (const auto &rle : hRegion) {
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
auto *endPtr = startPtr + rle.length;
auto vStart = cv::v_load(startPtr);
auto vEnd = cv::v_load(endPtr);
computeSumDiff(vStart, vEnd, diff0, diff1, diff2, diff3);
computeSqSumDiff(vStart, vEnd, diff10, diff11, diff12, diff13);
}
auto *sumPtrStart = sumPtr + i;
cv::v_store(buf.data(), diff0);
v_expand_store(sumPtrStart, buf);
cv::v_store(buf.data(), diff1);
v_expand_store(sumPtrStart + 4, buf);
cv::v_store(buf.data(), diff2);
v_expand_store(sumPtrStart + 8, buf);
cv::v_store(buf.data(), diff3);
v_expand_store(sumPtrStart + 12, buf);
auto *sqSumPtrStart = sqSumPtr + i;
cv::v_store(buf.data(), diff10);
v_expand_store(sqSumPtrStart, buf);
cv::v_store(buf.data(), diff11);
v_expand_store(sqSumPtrStart + 4, buf);
cv::v_store(buf.data(), diff12);
v_expand_store(sqSumPtrStart + 8, buf);
cv::v_store(buf.data(), diff13);
v_expand_store(sqSumPtrStart + 12, buf);
}
for (; i < sumWidth; i++) {
int32_t partSum = 0;
int32_t partSqSum = 0;
for (const auto &rle : hRegion) {
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
auto *endPtr = startPtr + rle.length;
const int32_t start = *startPtr;
const int32_t end = *endPtr;
partSum += end - start;
partSqSum += end * end - start * start;
}
auto *sumPtrStart = sumPtr + i;
sumPtrStart[ 0 ] = sumPtrStart[ -1 ] + partSum;
auto *sqSumPtrStart = sqSumPtr + i;
sqSumPtrStart[ 0 ] = sqSumPtrStart[ -1 ] + partSqSum;
}
}
void shiftV(const uchar *src, std::size_t srcStep, const VRegion &vRegion, int row, double *sum,
std::size_t sumStep, double *sqSum, std::size_t sqSumStep) {
auto *srcPtr = src;
auto *sumPtr = sum + row * sumStep;
auto *sqSumPtr = sqSum + row * sqSumStep;
int32_t partSum = 0;
int32_t partSqSum = 0;
for (const auto &rle : vRegion) {
auto *startPtr = srcPtr + (row + rle.startRow - 1) * srcStep + rle.col;
auto *endPtr = startPtr + rle.length * srcStep;
const int32_t start = *startPtr;
const int32_t end = *endPtr;
partSum += end - start;
partSqSum += end * end - start * start;
}
sumPtr[ 0 ] = *(sumPtr - sumStep) + partSum;
sqSumPtr[ 0 ] = *(sqSumPtr - sqSumStep) + partSqSum;
}
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
const HRegion &hRegion, const VRegion &vRegion) {
const auto size = src.size() - templateSize + cv::Size(1, 1);
sum.create(size, CV_64FC1);
sqSum.create(size, CV_64FC1);
const auto *srcPtr = src.data;
auto *sumPtr = reinterpret_cast<double *>(sum.data);
auto *sqSumPtr = reinterpret_cast<double *>(sqSum.data);
const auto sumStep = sum.step1();
const auto sqSumStep = sqSum.step1();
// compute first
uint64 sum0;
uint64 sqSum0;
computeSum(src, hRegion, sum0, sqSum0);
sumPtr[ 0 ] = static_cast<double>(sum0);
sqSumPtr[ 0 ] = static_cast<double>(sqSum0);
for (int y = 0; y < size.height; y++) {
shiftH(srcPtr, src.step, hRegion, y, sumPtr, sumStep, sum.cols, sqSumPtr, sqSumStep);
if (y + 1 < size.height) {
shiftV(srcPtr, src.step, vRegion, y + 1, sumPtr, sumStep, sqSumPtr, sqSumStep);
}
}
}

8
sum.h Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include "privateType.h"
#include <opencv2/opencv.hpp>
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
const HRegion &hRegion, const VRegion &vRegion);