Compare commits
52 Commits
v1.0
...
feature-ro
Author | SHA1 | Date | |
---|---|---|---|
ad95fac40c | |||
699169140a | |||
5e02244e93 | |||
1a49d466fe | |||
a9d69311a8 | |||
11517dad5c | |||
5035233913 | |||
2d467ebbef | |||
ca1e8149a1 | |||
1749a2c720 | |||
bcdade9daa | |||
cfdb84093d | |||
31c51d77ba | |||
e5cf60e254 | |||
69226dd197 | |||
d9a50a40d1 | |||
e6f440006b | |||
fa61990d3c | |||
b891c34176 | |||
d119d45722 | |||
25f0b54a5a | |||
4b26673655 | |||
f6ee887e20 | |||
9bf943b8d1 | |||
3c3b0d4632 | |||
d7afc907c2 | |||
1b2286100e | |||
82320082fa | |||
59bbc596bb | |||
b041883bca | |||
b4042ca7cf | |||
c879408141 | |||
d8d888c045 | |||
a0f53bc38e | |||
7346fc74b9 | |||
93220b592e | |||
4708a02218 | |||
88fdb73ae2 | |||
a19993a747 | |||
7d7cba4a4c | |||
df05d073c7 | |||
bf6fb79131 | |||
18a73abd03 | |||
e54921809c | |||
34e8f8bd67 | |||
e0202a0fd9 | |||
199ea46bb1 | |||
f794aeb840 | |||
1efe5f11ac | |||
1081c707af | |||
489d5428e3 | |||
dea266853c |
@ -1,17 +1,58 @@
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
|
||||
project(match)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
add_executable(${PROJECT_NAME}
|
||||
main.cpp
|
||||
|
||||
option(ENABLE_OPENMP "enable openmp" OFF)
|
||||
if(ENABLE_OPENMP)
|
||||
# find OpenMP
|
||||
find_package(OpenMP REQUIRED)
|
||||
|
||||
if(OPENMP_FOUND)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||
endif(OPENMP_FOUND)
|
||||
endif(ENABLE_OPENMP)
|
||||
|
||||
#==============================================================
|
||||
#library
|
||||
#==============================================================
|
||||
add_library(algo SHARED
|
||||
grayMatch.h
|
||||
grayMatch.cpp
|
||||
sum.h
|
||||
sum.cpp
|
||||
privateType.h
|
||||
apiExport.h
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBRARIES})
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
target_include_directories(algo PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_link_libraries(algo ${OpenCV_LIBRARIES} $<$<BOOL:${OPENMP_FOUND}>:OpenMP::OpenMP_CXX>)
|
||||
target_compile_options(algo PRIVATE
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:-fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
|
||||
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:-mlsx>
|
||||
)
|
||||
target_compile_definitions(algo PRIVATE API_EXPORTS
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:CV_LSX>
|
||||
)
|
||||
|
||||
#==============================================================
|
||||
#exe
|
||||
#==============================================================
|
||||
add_executable(${PROJECT_NAME}
|
||||
main.cpp
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBRARIES} algo)
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
$<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
|
||||
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
|
||||
$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
|
||||
)
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE IMG_DIR="${CMAKE_CURRENT_SOURCE_DIR}/img")
|
||||
|
||||
message("Arch:${CMAKE_SYSTEM_PROCESSOR}")
|
||||
|
13
README.md
Normal file
13
README.md
Normal file
@ -0,0 +1,13 @@
|
||||
# Template match with gray model(ncc)
|
||||
|
||||
## rotate-model vs main branch
|
||||
|
||||
|
||||
| method | main(ms) | rotate-model(ms) | factor(main/rotate-model) |
|
||||
|-----------|----------|------------------|---------------------------|
|
||||
| train | 1 | 680 | 1/680 |
|
||||
| match | 31 | 16 | 2 |
|
||||
| train-omp | 1 | 160 | 1/160 |
|
||||
| match-omp | 12 | 6 | 2 |
|
||||
|
||||
result: main brain method train model really fast(1ms) and parameter free, rotate-model method 2x faster in matching.
|
31
apiExport.h
Normal file
31
apiExport.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
|
||||
#define API_EXPORT __declspec(dllexport)
|
||||
#define API_IMPORT __declspec(dllimport)
|
||||
#define API_LOCAL
|
||||
#elif defined(linux) || defined(__linux) || defined(__linux__)
|
||||
#define API_EXPORT __attribute__((visibility("default")))
|
||||
#define API_IMPORT __attribute__((visibility("default")))
|
||||
#define API_LOCAL __attribute__((visibility("hidden")))
|
||||
#elif defined(__APPLE__)
|
||||
#define API_EXPORT __attribute__((visibility("default")))
|
||||
#define API_IMPORT __attribute__((visibility("default")))
|
||||
#define API_LOCAL __attribute__((visibility("hidden")))
|
||||
#else
|
||||
#define API_EXPORT
|
||||
#define API_IMPORT
|
||||
#define API_LOCAL
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define API_DEMANGLED extern "C"
|
||||
#else
|
||||
#define API_DEMANGLED
|
||||
#endif
|
||||
|
||||
#ifdef API_EXPORTS
|
||||
#define API_PUBLIC API_DEMANGLED API_EXPORT
|
||||
#else
|
||||
#define API_PUBLIC API_DEMANGLED API_IMPORT
|
||||
#endif
|
967
grayMatch.cpp
967
grayMatch.cpp
File diff suppressed because it is too large
Load Diff
15
grayMatch.h
15
grayMatch.h
@ -3,6 +3,8 @@
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
#include "apiExport.h"
|
||||
|
||||
struct Model;
|
||||
|
||||
struct Pose {
|
||||
@ -12,14 +14,11 @@ struct Pose {
|
||||
float score;
|
||||
};
|
||||
|
||||
Model *trainModel(const cv::Mat &src, int level);
|
||||
API_PUBLIC Model *trainModel(const cv::Mat &src, int level, double startAngle, double spanAngle,
|
||||
double angleStep);
|
||||
|
||||
std::vector<Pose> matchModel(const cv::Mat &dst, const Model *model, int level, double startAngle,
|
||||
double spanAngle, double maxOverlap, double minScore, int maxCount,
|
||||
int subpixel);
|
||||
|
||||
void serialize(Model *model, int &size, uint8_t *buffer);
|
||||
|
||||
Model *deserialize(int size, uint8_t *buffer);
|
||||
API_PUBLIC void matchModel(const cv::Mat &dst, const Model *model, int *count, Pose *poses,
|
||||
int level, double startAngle, double spanAngle, double maxOverlap,
|
||||
double minScore, int subpixel);
|
||||
|
||||
#endif // GRAY_MATCH_H
|
BIN
img/model3.png
Normal file
BIN
img/model3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
BIN
img/model3_src2.png
Normal file
BIN
img/model3_src2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 336 KiB |
103
main.cpp
103
main.cpp
@ -1,42 +1,93 @@
|
||||
#include "grayMatch.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <opencv2/core/utility.hpp>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <string>
|
||||
|
||||
int main() {
|
||||
auto src =
|
||||
cv::imread("C:/Users/qiuyong/Desktop/test/template/model3.bmp", cv::IMREAD_GRAYSCALE);
|
||||
auto dst =
|
||||
cv::imread("C:/Users/qiuyong/Desktop/test/template/model3_src1.bmp", cv::IMREAD_GRAYSCALE);
|
||||
int main(int argc, const char *argv[]) {
|
||||
const std::string keys = "{model m || model image}"
|
||||
"{scene s || scene image}"
|
||||
"{view v || view result}"
|
||||
"{threshold t | 0.7 | match minium score}"
|
||||
"{bench b || match benchmark}"
|
||||
"{help h || print this help}";
|
||||
|
||||
cv::CommandLineParser cmd(argc, argv, keys);
|
||||
if (!cmd.check()) {
|
||||
cmd.printErrors();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (cmd.has("help")) {
|
||||
cmd.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto srcFile = std::string(IMG_DIR) + "/model3.png";
|
||||
auto dstFile = std::string(IMG_DIR) + "/model3_src2.png";
|
||||
if (cmd.has("model"))
|
||||
srcFile = cmd.get<std::string>("model");
|
||||
if (cmd.has("scene"))
|
||||
dstFile = cmd.get<std::string>("scene");
|
||||
|
||||
auto src = cv::imread(srcFile, cv::IMREAD_GRAYSCALE);
|
||||
auto dst = cv::imread(dstFile, cv::IMREAD_GRAYSCALE);
|
||||
if (src.empty() || dst.empty()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int count = 70;
|
||||
std::vector<Pose> poses(count);
|
||||
auto score = cmd.get<float>("threshold");
|
||||
|
||||
auto t0 = cv::getTickCount();
|
||||
auto model = trainModel(src, -1);
|
||||
auto model = trainModel(src, -1, 0, 360, -1);
|
||||
auto t1 = cv::getTickCount();
|
||||
auto poses = matchModel(dst, model, -1, 0, 360, 0, 0.5, 70, 1);
|
||||
auto t2 = cv::getTickCount();
|
||||
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
|
||||
auto t2 = cv::getTickCount();
|
||||
|
||||
auto trainCost = double(t1 - t0) / cv::getTickFrequency();
|
||||
auto matchCost = double(t2 - t1) / cv::getTickFrequency();
|
||||
const auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
|
||||
const auto matchCost = static_cast<double>(t2 - t1) / cv::getTickFrequency();
|
||||
std::cout << "train(s):" << trainCost << " match(s):" << matchCost << std::endl;
|
||||
|
||||
cv::Mat color;
|
||||
cv::cvtColor(dst, color, cv::COLOR_GRAY2RGB);
|
||||
for (auto &pose : poses) {
|
||||
cv::RotatedRect rect(cv::Point2f(pose.x, pose.y), src.size(), -pose.angle);
|
||||
|
||||
std::vector<cv::Point2f> pts;
|
||||
rect.points(pts);
|
||||
|
||||
cv::line(color, pts[ 0 ], pts[ 1 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
cv::line(color, pts[ 1 ], pts[ 2 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
cv::line(color, pts[ 2 ], pts[ 3 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
cv::line(color, pts[ 3 ], pts[ 0 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
const auto &pose = poses[ i ];
|
||||
std::cout << pose.x << "," << pose.y << "," << pose.angle << "," << pose.score << std::endl;
|
||||
}
|
||||
|
||||
cv::imshow("img", color);
|
||||
cv::waitKey();
|
||||
if (cmd.has("bench")) {
|
||||
constexpr int times = 100;
|
||||
|
||||
auto start = cv::getTickCount();
|
||||
for (int i = 0; i < times; i++) {
|
||||
matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
|
||||
count = 70;
|
||||
}
|
||||
auto end = cv::getTickCount();
|
||||
|
||||
const auto cost = static_cast<double>(end - start) / cv::getTickFrequency() / times;
|
||||
std::cout << "match bench avg(s):" << cost << std::endl;
|
||||
}
|
||||
|
||||
if (cmd.has("view")) {
|
||||
cv::Mat color;
|
||||
cv::cvtColor(dst, color, cv::COLOR_GRAY2RGB);
|
||||
for (int i = 0; i < count; i++) {
|
||||
const auto &pose = poses[ i ];
|
||||
cv::RotatedRect rect(cv::Point2f(pose.x, pose.y), src.size(), -pose.angle);
|
||||
|
||||
cv::Point2f pts[ 4 ];
|
||||
rect.points(pts);
|
||||
|
||||
cv::line(color, pts[ 0 ], pts[ 1 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
cv::line(color, pts[ 1 ], pts[ 2 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
cv::line(color, pts[ 2 ], pts[ 3 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
cv::line(color, pts[ 3 ], pts[ 0 ], cv::Scalar(255, 0, 0), 1, cv::LINE_AA);
|
||||
}
|
||||
|
||||
cv::imshow("img", color);
|
||||
cv::waitKey();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
50
privateType.h
Normal file
50
privateType.h
Normal file
@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
struct HRLE {
|
||||
int row = -1;
|
||||
int startColumn = -1;
|
||||
int length = 0;
|
||||
};
|
||||
|
||||
struct VRLE {
|
||||
int col = -1;
|
||||
int startRow = -1;
|
||||
int length = 0;
|
||||
};
|
||||
|
||||
using HRegion = std::vector<HRLE>;
|
||||
using VRegion = std::vector<VRLE>;
|
||||
|
||||
struct Template {
|
||||
cv::Mat img;
|
||||
HRegion hRegion;
|
||||
VRegion vRegion;
|
||||
cv::RotatedRect rect;
|
||||
|
||||
double mean = 0;
|
||||
double normal = 0;
|
||||
double invArea = 0;
|
||||
};
|
||||
|
||||
struct Layer {
|
||||
double angleStep = 0;
|
||||
|
||||
std::vector<Template> templates;
|
||||
};
|
||||
|
||||
struct Model {
|
||||
double startAngle = 0;
|
||||
double stopAngle = 0;
|
||||
double angleStep = 0;
|
||||
|
||||
cv::Size srcSize;
|
||||
std::vector<Layer> layers;
|
||||
};
|
||||
|
||||
#if CV_VERSION_MAJOR >= 4 && CV_VERSION_MINOR >= 8
|
||||
#define simdSize(type) cv::VTraits<type>::nlanes
|
||||
#else
|
||||
#define simdSize(type) type::nlanes
|
||||
#endif
|
240
sum.cpp
Normal file
240
sum.cpp
Normal file
@ -0,0 +1,240 @@
|
||||
#include "sum.h"
|
||||
|
||||
#include <opencv2/core/hal/intrin.hpp>
|
||||
|
||||
inline cv::v_uint32x4 v_add_expand(const cv::v_uint16x8 &src) {
|
||||
cv::v_uint32x4 low;
|
||||
cv::v_uint32x4 high;
|
||||
cv::v_expand(src, low, high);
|
||||
|
||||
return cv::v_add(low, high);
|
||||
}
|
||||
|
||||
inline cv::v_uint64x2 v_add_expand(const cv::v_uint32x4 &src) {
|
||||
cv::v_uint64x2 low;
|
||||
cv::v_uint64x2 high;
|
||||
cv::v_expand(src, low, high);
|
||||
|
||||
return cv::v_add(low, high);
|
||||
}
|
||||
|
||||
inline void computeSum(const cv::v_uint8x16 &src, cv::v_uint32x4 &sum, cv::v_uint64x2 &sqSum) {
|
||||
cv::v_uint16x8 low;
|
||||
cv::v_uint16x8 high;
|
||||
cv::v_expand(src, low, high);
|
||||
|
||||
sum = cv::v_add(sum, v_add_expand(cv::v_add(low, high)));
|
||||
const auto dot = cv::v_dotprod_expand_fast(src, src);
|
||||
sqSum = cv::v_add(sqSum, v_add_expand(dot));
|
||||
}
|
||||
|
||||
void computeSum(const cv::Mat &src, const HRegion &hRegion, uint64 &sum, uint64 &sqSum) {
|
||||
constexpr auto blockSize = simdSize(cv::v_uint8);
|
||||
const auto *srcPtr = src.data;
|
||||
cv::v_uint32x4 vSum = cv::v_setzero_u32();
|
||||
cv::v_uint64x2 vSqSum = cv::v_setzero_u64();
|
||||
uint32_t partSum = 0;
|
||||
uint64 partSqSum = 0;
|
||||
|
||||
for (const auto &rle : hRegion) {
|
||||
const auto *ptr = srcPtr + src.step * rle.row + rle.startColumn;
|
||||
|
||||
int i = 0;
|
||||
for (; i < rle.length - blockSize; i += blockSize) {
|
||||
computeSum(cv::v_load(ptr + i), vSum, vSqSum);
|
||||
}
|
||||
|
||||
// TODO aligned fill 0
|
||||
for (; i < rle.length; i++) {
|
||||
const auto val = ptr[ i ];
|
||||
partSum += val;
|
||||
partSqSum += static_cast<ushort>(val) * static_cast<ushort>(val);
|
||||
}
|
||||
}
|
||||
|
||||
sum = cv::v_reduce_sum(vSum) + partSum;
|
||||
sqSum = cv::v_reduce_sum(vSqSum) + partSqSum;
|
||||
}
|
||||
|
||||
inline void computeSumDiff(const cv::v_uint16x8 &start, const cv::v_uint16x8 &end,
|
||||
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1) {
|
||||
cv::v_int16x8 sub;
|
||||
{
|
||||
const auto vStart = cv::v_reinterpret_as_s16(start);
|
||||
const auto vEnd = cv::v_reinterpret_as_s16(end);
|
||||
sub = cv::v_sub(vEnd, vStart);
|
||||
}
|
||||
|
||||
cv::v_int32x4 val = cv::v_expand_low(sub);
|
||||
diff0 = cv::v_add(diff0, val);
|
||||
|
||||
val = cv::v_expand_high(sub);
|
||||
diff1 = cv::v_add(diff1, val);
|
||||
}
|
||||
|
||||
inline void computeSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
|
||||
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
|
||||
cv::v_int32x4 &diff3) {
|
||||
computeSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0, diff1);
|
||||
computeSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff2, diff3);
|
||||
}
|
||||
|
||||
inline void computeSqSumDiff(const cv::v_uint32x4 &start, const cv::v_uint32x4 &end,
|
||||
cv::v_int32x4 &diff0) {
|
||||
const cv::v_int32x4 vStart = cv::v_reinterpret_as_s32(start);
|
||||
const cv::v_int32x4 vEnd = cv::v_reinterpret_as_s32(end);
|
||||
|
||||
const cv::v_int32x4 sub = cv::v_sub(vEnd, vStart);
|
||||
diff0 = cv::v_add(diff0, sub);
|
||||
}
|
||||
|
||||
inline void computeSqSumDiff(cv::v_uint16x8 &start, cv::v_uint16x8 &end, cv::v_int32x4 &diff0,
|
||||
cv::v_int32x4 &diff1) {
|
||||
start = cv::v_mul(start, start);
|
||||
end = cv::v_mul(end, end);
|
||||
|
||||
computeSqSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0);
|
||||
computeSqSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff1);
|
||||
}
|
||||
|
||||
inline void computeSqSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
|
||||
cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
|
||||
cv::v_int32x4 &diff3) {
|
||||
|
||||
auto vStart = cv::v_expand_low(start);
|
||||
auto vEnd = cv::v_expand_low(end);
|
||||
computeSqSumDiff(vStart, vEnd, diff0, diff1);
|
||||
|
||||
vStart = cv::v_expand_high(start);
|
||||
vEnd = cv::v_expand_high(end);
|
||||
computeSqSumDiff(vStart, vEnd, diff2, diff3);
|
||||
}
|
||||
|
||||
inline void v_expand_store(double *ptr, const std::array<int, 4> &val) {
|
||||
ptr[ 0 ] = ptr[ -1 ] + val[ 0 ];
|
||||
ptr[ 1 ] = ptr[ 0 ] + val[ 1 ];
|
||||
ptr[ 2 ] = ptr[ 1 ] + val[ 2 ];
|
||||
ptr[ 3 ] = ptr[ 2 ] + val[ 3 ];
|
||||
}
|
||||
|
||||
void shiftH(const uchar *src, std::size_t srcStep, const HRegion &hRegion, int row, double *sum,
|
||||
std::size_t sumStep, int sumWidth, double *sqSum, std::size_t sqSumStep) {
|
||||
constexpr auto blockSize = simdSize(cv::v_uint8);
|
||||
auto *srcPtr = src;
|
||||
auto *sumPtr = sum + row * sumStep;
|
||||
auto *sqSumPtr = sqSum + row * sqSumStep;
|
||||
|
||||
std::array<int, 4> buf{};
|
||||
|
||||
int i = 1;
|
||||
for (; i < sumWidth - blockSize; i += blockSize) {
|
||||
cv::v_int32x4 diff0 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff1 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff2 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff3 = cv::v_setzero_s32();
|
||||
|
||||
cv::v_int32x4 diff10 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff11 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff12 = cv::v_setzero_s32();
|
||||
cv::v_int32x4 diff13 = cv::v_setzero_s32();
|
||||
|
||||
for (const auto &rle : hRegion) {
|
||||
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
|
||||
auto *endPtr = startPtr + rle.length;
|
||||
|
||||
auto vStart = cv::v_load(startPtr);
|
||||
auto vEnd = cv::v_load(endPtr);
|
||||
computeSumDiff(vStart, vEnd, diff0, diff1, diff2, diff3);
|
||||
computeSqSumDiff(vStart, vEnd, diff10, diff11, diff12, diff13);
|
||||
}
|
||||
|
||||
auto *sumPtrStart = sumPtr + i;
|
||||
cv::v_store(buf.data(), diff0);
|
||||
v_expand_store(sumPtrStart, buf);
|
||||
cv::v_store(buf.data(), diff1);
|
||||
v_expand_store(sumPtrStart + 4, buf);
|
||||
cv::v_store(buf.data(), diff2);
|
||||
v_expand_store(sumPtrStart + 8, buf);
|
||||
cv::v_store(buf.data(), diff3);
|
||||
v_expand_store(sumPtrStart + 12, buf);
|
||||
|
||||
auto *sqSumPtrStart = sqSumPtr + i;
|
||||
cv::v_store(buf.data(), diff10);
|
||||
v_expand_store(sqSumPtrStart, buf);
|
||||
cv::v_store(buf.data(), diff11);
|
||||
v_expand_store(sqSumPtrStart + 4, buf);
|
||||
cv::v_store(buf.data(), diff12);
|
||||
v_expand_store(sqSumPtrStart + 8, buf);
|
||||
cv::v_store(buf.data(), diff13);
|
||||
v_expand_store(sqSumPtrStart + 12, buf);
|
||||
}
|
||||
|
||||
for (; i < sumWidth; i++) {
|
||||
int32_t partSum = 0;
|
||||
int32_t partSqSum = 0;
|
||||
for (const auto &rle : hRegion) {
|
||||
auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
|
||||
auto *endPtr = startPtr + rle.length;
|
||||
|
||||
const int32_t start = *startPtr;
|
||||
const int32_t end = *endPtr;
|
||||
partSum += end - start;
|
||||
partSqSum += end * end - start * start;
|
||||
}
|
||||
|
||||
auto *sumPtrStart = sumPtr + i;
|
||||
sumPtrStart[ 0 ] = sumPtrStart[ -1 ] + partSum;
|
||||
auto *sqSumPtrStart = sqSumPtr + i;
|
||||
sqSumPtrStart[ 0 ] = sqSumPtrStart[ -1 ] + partSqSum;
|
||||
}
|
||||
}
|
||||
|
||||
void shiftV(const uchar *src, std::size_t srcStep, const VRegion &vRegion, int row, double *sum,
|
||||
std::size_t sumStep, double *sqSum, std::size_t sqSumStep) {
|
||||
auto *srcPtr = src;
|
||||
auto *sumPtr = sum + row * sumStep;
|
||||
auto *sqSumPtr = sqSum + row * sqSumStep;
|
||||
|
||||
int32_t partSum = 0;
|
||||
int32_t partSqSum = 0;
|
||||
for (const auto &rle : vRegion) {
|
||||
auto *startPtr = srcPtr + (row + rle.startRow - 1) * srcStep + rle.col;
|
||||
auto *endPtr = startPtr + rle.length * srcStep;
|
||||
|
||||
const int32_t start = *startPtr;
|
||||
const int32_t end = *endPtr;
|
||||
|
||||
partSum += end - start;
|
||||
partSqSum += end * end - start * start;
|
||||
}
|
||||
|
||||
sumPtr[ 0 ] = *(sumPtr - sumStep) + partSum;
|
||||
sqSumPtr[ 0 ] = *(sqSumPtr - sqSumStep) + partSqSum;
|
||||
}
|
||||
|
||||
void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
|
||||
const HRegion &hRegion, const VRegion &vRegion) {
|
||||
const auto size = src.size() - templateSize + cv::Size(1, 1);
|
||||
sum.create(size, CV_64FC1);
|
||||
sqSum.create(size, CV_64FC1);
|
||||
|
||||
const auto *srcPtr = src.data;
|
||||
auto *sumPtr = reinterpret_cast<double *>(sum.data);
|
||||
auto *sqSumPtr = reinterpret_cast<double *>(sqSum.data);
|
||||
const auto sumStep = sum.step1();
|
||||
const auto sqSumStep = sqSum.step1();
|
||||
|
||||
// compute first
|
||||
uint64 sum0;
|
||||
uint64 sqSum0;
|
||||
computeSum(src, hRegion, sum0, sqSum0);
|
||||
sumPtr[ 0 ] = static_cast<double>(sum0);
|
||||
sqSumPtr[ 0 ] = static_cast<double>(sqSum0);
|
||||
|
||||
for (int y = 0; y < size.height; y++) {
|
||||
shiftH(srcPtr, src.step, hRegion, y, sumPtr, sumStep, sum.cols, sqSumPtr, sqSumStep);
|
||||
if (y + 1 < size.height) {
|
||||
shiftV(srcPtr, src.step, vRegion, y + 1, sumPtr, sumStep, sqSumPtr, sqSumStep);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user