static check

fix: mismatch faction sign
add cmd
2025-05-08 17:56:22 +08:00 · 2025-03-27 13:43:37 +08:00 · 2025-03-25 17:56:02 +08:00 · 2025-03-10 09:10:32 +08:00 · 2025-03-02 12:47:11 +08:00 · 2025-02-27 21:04:58 +08:00
22 changed files with 1286 additions and 767 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.12)
-project(match)

-find_package(OpenCV REQUIRED)
+project(match)
+find_package(OpenCV 4.8 REQUIRED)

 option(ENABLE_OPENMP "enable openmp" OFF)
 if(ENABLE_OPENMP)
@ -21,24 +21,28 @@ endif(ENABLE_OPENMP)
 add_library(algo SHARED
    grayMatch.h
    grayMatch.cpp
-    sum.h
-    sum.cpp
+    serialize.cpp
    privateType.h
    apiExport.h
+    integral.h
+    integral.cpp
 )

 target_include_directories(algo PRIVATE ${OpenCV_INCLUDE_DIRS})
 target_link_libraries(algo ${OpenCV_LIBRARIES} $<$<BOOL:${OPENMP_FOUND}>:OpenMP::OpenMP_CXX>)
 target_compile_options(algo PRIVATE
        $<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
-        $<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:-fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
+        $<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wl,--exclude-libs,ALL -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
        $<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
+        #$<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Darwin>>: -fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
        $<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:-mlsx>
 )
-target_compile_definitions(algo PRIVATE  API_EXPORTS
+target_compile_definitions(algo PUBLIC API_EXPORTS
        $<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:CV_LSX>
 )

+message("Arch:${CMAKE_SYSTEM_PROCESSOR}")
+
 #==============================================================
 #exe
 #==============================================================
@ -54,5 +58,3 @@ target_compile_options(${PROJECT_NAME} PRIVATE
        $<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
 )
 target_compile_definitions(${PROJECT_NAME} PRIVATE IMG_DIR="${CMAKE_CURRENT_SOURCE_DIR}/img")
-
-message("Arch:${CMAKE_SYSTEM_PROCESSOR}")
--- a/24
+++ b/24
@ -0,0 +1,24 @@
+BSD 2-Clause License
+
+Copyright (c) 2024, SurfaceMan
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/README.md
+++ b/README.md
@ -1,13 +1,20 @@
 # Template match with gray model(ncc)

-## rotate-model vs main branch
+## Note: branch [feature-rotate-model](https://github.com/SurfaceMan/gray_match/tree/feature-rotate-model) method 2x faster at matching!
+
+## highlights:
+ 1. original code based [Fastest_Image_Pattern_Matching](https://github.com/DennisLiu1993/Fastest_Image_Pattern_Matching), you can check out tag [v1.0](https://github.com/SurfaceMan/gray_match/releases/tag/v1.0) for more details.
+ 2. refactor simd match process with opencv [Universal intrinsics](https://docs.opencv.org/4.x/df/d91/group__core__hal__intrin.html), have be tested on x86_64(sse),arm(neon),LoongArch(lsx).
+ 3. support model save/load as binary file
+ 4. provide pure c interface
+ 5. support openmp
+ 6. position with 3x3 subpixel interpolation
+ 
+## usage:
+all you need can be found in [main.cpp](main.cpp)
+
+## gallery:
+![sample](img/result.png)


-| method    | main(ms) | rotate-model(ms) | factor(main/rotate-model) |
-|-----------|----------|------------------|---------------------------|
-| train     | 1        | 680              | 1/680                     |     
-| match     | 31       | 16               | 2                         |
-| train-omp | 1        | 160              | 1/160                     |
-| match-omp | 12       | 6                | 2                         |
-
-result: main brain method train model really fast(1ms) and parameter free, rotate-model method 2x faster in matching.
+ 
--- a/TODO.md
+++ b/TODO.md
@ -0,0 +1 @@
+1. opencv intrinsics api [changed](https://github.com/opencv/opencv/pull/24371) since 4.9, need refactor 
--- a/grayMatch.cpp
+++ b/grayMatch.cpp
--- a/grayMatch.h
+++ b/grayMatch.h
@ -1,12 +1,12 @@
 #ifndef GRAY_MATCH_H
 #define GRAY_MATCH_H

-#include <opencv2/opencv.hpp>
-
 #include "apiExport.h"

 struct Model;

+using Model_t = Model *;
+
 struct Pose {
    float x;
    float y;
@ -14,11 +14,93 @@ struct Pose {
    float score;
 };

-API_PUBLIC Model *trainModel(const cv::Mat &src, int level, double startAngle, double spanAngle,
-                             double angleStep);
+/**
+ * @brief train match model
+ * @param data image data
+ * @param width image width
+ * @param height image height
+ * @param channels image channels 1(gray)/3(rgb)/4(rgba)
+ * @param bytesPerLine bytes per line
+ * @param roiLeft rectangle roi left
+ * @param roiTop rectangle roi top
+ * @param roiWidth rectangle roi width
+ * @param roiHeight rectangle roi height
+ * @param levelNum pyramid levels (> 0:user setting,-1:auto)
+ * @return
+ */
+API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height, int channels,
+                              int bytesPerLine, int roiLeft, int roiTop, int roiWidth,
+                              int roiHeight, int levelNum);
+/**
+ * @brief match model
+ * @param data image data
+ * @param width image width
+ * @param height image height
+ * @param channels image channels 1(gray)/3(rgb)/4(rgba)
+ * @param bytesPerLine bytes per line
+ * @param roiLeft rectangle roi left
+ * @param roiTop rectangle roi top
+ * @param roiWidth rectangle roi width
+ * @param roiHeight rectangle roi height
+ * @param model trained model
+ * @param count in(max detect count)/out(found count)
+ * @param poses pose array inited with size not less than count
+ * @param level match start at which level (level>=0 && level<modelLevel-1,-1:auto)
+ * @param startAngle rotation start angle
+ * @param spanAngle rotation angle range
+ * @param maxOverlap overlap threshold
+ * @param minScore minimum matched score
+ * @param subpixel compute subpixel result
+ * @return
+ */
+API_PUBLIC void matchModel(const unsigned char *data, int width, int height, int channels,
+                           int bytesPerLine, int roiLeft, int roiTop, int roiWidth, int roiHeight,
+                           Model_t model, int *count, Pose *poses, int level, double startAngle,
+                           double spanAngle, double maxOverlap, double minScore, int subpixel);

-API_PUBLIC void matchModel(const cv::Mat &dst, const Model *model, int *count, Pose *poses,
-                           int level, double startAngle, double spanAngle, double maxOverlap,
-                           double minScore, int subpixel);
+/**
+ * @brief get trained model levels
+ * @param model
+ * @return pyramid level
+ */
+API_PUBLIC int modelLevel(Model_t model);

-#endif // GRAY_MATCH_H
+/**
+ * @brief get trained model image
+ * @param model
+ * @param level pyramid level index(level>=0 && level<modelLevel-1)
+ * @param data image data buffer(need allocated), can input nullptr to query width/height/channels
+ * @param length buffer length not less than width*height*channels
+ * @param width image width,  can input nullptr
+ * @param height image height, can input nullptr
+ * @param channels image channels, can input nullptr
+ * @return
+ */
+API_PUBLIC void modelImage(Model_t model, int level, unsigned char *data, int length, int *width,
+                           int *height, int *channels);
+
+/**
+ * @brief free model
+ * @param model
+ * @return
+ */
+API_PUBLIC void freeModel(Model_t *model);
+
+/**
+ * @brief serialize model to buffer
+ * @param model
+ * @param buffer need allocated, can input nullptr to query size
+ * @param size in(buffer size)/out(written size)
+ * @return true(success)false(failed)
+ */
+API_PUBLIC bool serialize(Model_t model, unsigned char *buffer, int *size);
+
+/**
+ * @brief deserialize model
+ * @param buffer
+ * @param size buffer size
+ * @return model
+ */
+API_PUBLIC Model_t deserialize(unsigned char *buffer, int size);
+
+#endif // GRAY_MATCH_H
--- a/img/3.bmp
+++ b/img/3.bmp
--- a/img/h.bmp
+++ b/img/h.bmp
--- a/img/i.bmp
+++ b/img/i.bmp
--- a/img/j.bmp
+++ b/img/j.bmp
--- a/img/k.bmp
+++ b/img/k.bmp
--- a/img/l.bmp
+++ b/img/l.bmp
--- a/img/model3.png
+++ b/img/model3.png
--- a/img/model3_src2.png
+++ b/img/model3_src2.png
--- a/img/result.png
+++ b/img/result.png
--- a/integral.cpp
+++ b/integral.cpp
@ -0,0 +1,180 @@
+#include "integral.h"
+#include "privateType.h"
+
+#include <opencv2/core/hal/intrin.hpp>
+
+inline void expand(const cv::v_int32 &src, cv::v_float64 &low, cv::v_float64 &high) {
+    low  = cv::v_cvt_f64(src);
+    high = cv::v_cvt_f64_high(src);
+}
+
+inline void integralSum(const cv::v_uint16 &src, double *dst, const double *prevDst,
+                        cv::v_uint32 &pre) {
+    auto sum = cv::v_add(src, cv::v_rotate_left<1>(src));
+    sum      = cv::v_add(sum, cv::v_rotate_left<2>(sum));
+    sum      = cv::v_add(sum, cv::v_rotate_left<4>(sum));
+
+    cv::v_uint32 v1;
+    cv::v_uint32 v2;
+    cv::v_expand(sum, v1, v2);
+    v1  = cv::v_add(v1, pre);
+    v2  = cv::v_add(v2, pre);
+    pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
+
+    cv::v_float64 v3;
+    cv::v_float64 v4;
+    expand(cv::v_reinterpret_as_s32(v1), v3, v4);
+    cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
+    cv::v_store(dst + simdSize(cv::v_float64),
+                cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
+
+    expand(cv::v_reinterpret_as_s32(v2), v3, v4);
+    cv::v_store(dst + simdSize(cv::v_float64) * 2,
+                cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
+    cv::v_store(dst + simdSize(cv::v_float64) * 3,
+                cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
+}
+
+inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
+    cv::v_uint32 v1;
+    cv::v_uint32 v2;
+    cv::v_expand(src, v1, v2);
+
+    {
+        auto         shift1 = cv::v_rotate_left<1>(src);
+        cv::v_uint32 v3;
+        cv::v_uint32 v4;
+        cv::v_expand(shift1, v3, v4);
+
+        v1 = cv::v_add(v1, v3);
+        v2 = cv::v_add(v2, v4);
+
+        v4 = cv::v_extract<2>(v1, v2);
+        v2 = cv::v_add(v2, v4);
+
+        v3 = cv::v_rotate_left<2>(v1);
+        v1 = cv::v_add(v1, v3);
+
+        v1 = cv::v_add(v1, pre);
+        v2 = cv::v_add(v2, v1);
+
+        pre = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(v2));
+    }
+
+    cv::v_float64 v3;
+    cv::v_float64 v4;
+    expand(cv::v_reinterpret_as_s32(v1), v3, v4);
+    cv::v_store(dst, cv::v_add(v3, cv::v_load(prevDst)));
+    cv::v_store(dst + simdSize(cv::v_float64),
+                cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64))));
+
+    expand(cv::v_reinterpret_as_s32(v2), v3, v4);
+    cv::v_store(dst + simdSize(cv::v_float64) * 2,
+                cv::v_add(v3, cv::v_load(prevDst + simdSize(cv::v_float64) * 2)));
+    cv::v_store(dst + simdSize(cv::v_float64) * 3,
+                cv::v_add(v4, cv::v_load(prevDst + simdSize(cv::v_float64) * 3)));
+}
+
+/*
+inline void integralSqSum(cv::v_uint32 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
+    src += cv::v_rotate_left<1>(src);
+    src += cv::v_rotate_left<2>(src);
+    src += pre;
+    pre  = cv::v_setall_u32(cv::v_extract_n<simdSize(cv::v_uint32) - 1>(src));
+
+    cv::v_float64 v1;
+    cv::v_float64 v2;
+    expand(cv::v_reinterpret_as_s32(src), v1, v2);
+
+    cv::v_store(dst, v1 + cv::v_load(prevDst));
+    cv::v_store(dst + simdSize(cv::v_float64), v2 + cv::v_load(prevDst +
+simdSize(cv::v_float64)));
+}
+
+inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
+    cv::v_uint32 v1;
+    cv::v_uint32 v2;
+    cv::v_expand(src, v1, v2);
+    integralSqSum(v1, dst, prevDst, pre);
+    integralSqSum(v2, dst + simdSize(cv::v_uint32), prevDst + simdSize(cv::v_uint32),
+pre);
+}
+*/
+
+inline void integralSum(const cv::v_uint16 &v1, const cv::v_uint16 &v2, double *dst,
+                        const double *prevDst, cv::v_uint32 &pre) {
+    integralSum(v1, dst, prevDst, pre);
+    integralSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
+}
+
+inline void integralSqSum(cv::v_uint16 &v1, cv::v_uint16 &v2, double *dst, double *prevDst,
+                          cv::v_uint32 &pre) {
+    v1 = cv::v_mul_wrap(v1, v1);
+    v2 = cv::v_mul_wrap(v2, v2);
+
+    integralSqSum(v1, dst, prevDst, pre);
+    integralSqSum(v2, dst + simdSize(cv::v_uint16), prevDst + simdSize(cv::v_uint16), pre);
+}
+
+void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum) {
+    const auto size = src.size() + cv::Size(1, 1);
+    sum.create(size, CV_64FC1);
+    sqSum.create(size, CV_64FC1);
+    memset(sum.data, 0, sum.step[ 0 ]);
+    memset(sqSum.data, 0, sqSum.step[ 0 ]);
+
+    const auto *srcStart   = src.data;
+    const auto  srcStep    = src.step[ 0 ];
+    auto       *sumStart   = reinterpret_cast<double *>(sum.data) + sum.step1() + 1;
+    const auto  sumStep    = sum.step[ 0 ] / sum.step[ 1 ];
+    auto       *sqSumStart = reinterpret_cast<double *>(sqSum.data) + sqSum.step1() + 1;
+    const auto  sqSumStep  = sqSum.step[ 0 ] / sqSum.step[ 1 ];
+    const auto  end        = size.width - simdSize(cv::v_uint8);
+    for (int y = 0; y < src.rows; y++) {
+        auto       *srcPtr    = srcStart + srcStep * y;
+        auto       *sumPtr    = sumStart + sumStep * y;
+        const auto *preSumPtr = sumStart + sumStep * (y - 1);
+        sumPtr[ -1 ]          = 0;
+
+        cv::v_uint32 prevSum = cv::vx_setzero_u32();
+        for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
+            cv::v_uint16 v1;
+            cv::v_uint16 v2;
+            cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
+
+            integralSum(v1, v2, sumPtr + x, preSumPtr + x, prevSum);
+        }
+    }
+
+    for (int y = 0; y < src.rows; y++) {
+        auto *srcPtr      = srcStart + srcStep * y;
+        auto *sqSumPtr    = sqSumStart + sqSumStep * y;
+        auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
+        sqSumPtr[ -1 ]    = 0;
+
+        cv::v_uint32 prevSqSum = cv::vx_setzero_u32();
+        for (int x = 0; x < end; x += simdSize(cv::v_uint8)) {
+            cv::v_uint16 v1;
+            cv::v_uint16 v2;
+            cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
+
+            integralSqSum(v1, v2, sqSumPtr + x, preSqSumPtr + x, prevSqSum);
+        }
+    }
+
+    const auto start = src.cols - src.cols % simdSize(cv::v_uint8);
+    for (int y = 0; y < src.rows; y++) {
+        auto       *srcPtr      = srcStart + srcStep * y;
+        auto       *sumPtr      = sumStart + sumStep * y;
+        auto       *sqSumPtr    = sqSumStart + sqSumStep * y;
+        const auto *preSumPtr   = sumStart + sumStep * (y - 1);
+        const auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
+        for (int x = start; x < src.cols; x++) {
+            const auto val   = srcPtr[ x ];
+            const auto sqVal = val * val;
+
+            sumPtr[ x ]   = sumPtr[ x - 1 ] + val + preSumPtr[ x ] - preSumPtr[ x - 1 ];
+            sqSumPtr[ x ] = sqSumPtr[ x - 1 ] + sqVal + preSqSumPtr[ x ] - preSqSumPtr[ x - 1 ];
+        }
+    }
+}
--- a/integral.h
+++ b/integral.h
@ -0,0 +1,8 @@
+#ifndef INTEGRAL_H
+#define INTEGRAL_H
+
+#include <opencv2/opencv.hpp>
+
+void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum);
+
+#endif // INTEGRAL_H
--- a/main.cpp
+++ b/main.cpp
@ -1,9 +1,9 @@
 #include "grayMatch.h"

+#include <fstream>
 #include <iostream>
 #include <opencv2/core/utility.hpp>
 #include <opencv2/opencv.hpp>
-#include <string>

 int main(int argc, const char *argv[]) {
    const std::string keys = "{model m || model image}"
@ -24,8 +24,8 @@ int main(int argc, const char *argv[]) {
        return 0;
    }

-    auto srcFile = std::string(IMG_DIR) + "/model3.png";
-    auto dstFile = std::string(IMG_DIR) + "/model3_src2.png";
+    auto srcFile = std::string(IMG_DIR) + "/3.bmp";
+    auto dstFile = std::string(IMG_DIR) + "/h.bmp";
    if (cmd.has("model"))
        srcFile = cmd.get<std::string>("model");
    if (cmd.has("scene"))
@ -37,19 +37,66 @@ int main(int argc, const char *argv[]) {
        return -1;
    }

+    const std::string modelName("model.bin");
+    {
+        auto t0    = cv::getTickCount();
+        auto model = trainModel(src.data, src.cols, src.rows, src.channels(),
+                                static_cast<int>(src.step), 0, 0, src.cols, src.rows, -1);
+        auto t1    = cv::getTickCount();
+
+        // get size
+        int size;
+        serialize(model, nullptr, &size);
+
+        // serialize to buffer
+        std::vector<uchar> buffer(size);
+        serialize(model, buffer.data(), &size);
+
+        // write to file
+        std::ofstream ofs(modelName, std::ios::binary | std::ios::out);
+        if (!ofs.is_open()) {
+            return -1;
+        }
+        ofs.write(reinterpret_cast<const char *>(buffer.data()), size);
+
+        freeModel(&model);
+
+        auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
+        std::cout << "train(s):" << trainCost << std::endl;
+    }
+
    int               count = 70;
    std::vector<Pose> poses(count);
+    Model_t           model;
    auto              score = cmd.get<float>("threshold");
+    {
+        // open file
+        std::ifstream ifs(modelName, std::ios::binary | std::ios::in);
+        if (!ifs.is_open()) {
+            return -2;
+        }

-    auto t0    = cv::getTickCount();
-    auto model = trainModel(src, -1, 0, 360, -1);
-    auto t1    = cv::getTickCount();
-    matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
-    auto t2 = cv::getTickCount();
+        // get size
+        ifs.seekg(0, std::ios::end);
+        auto size = ifs.tellg();
+        ifs.seekg(0, std::ios::beg);
+
+        // read to buffer
+        std::vector<uchar> buffer(size);
+        ifs.read(reinterpret_cast<char *>(buffer.data()), size);
+
+        // deserialize from buffer
+        model = deserialize(buffer.data(), static_cast<int>(buffer.size()));
+
+        auto t2 = cv::getTickCount();
+        matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0, 0,
+                   dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
+        auto t3 = cv::getTickCount();
+
+        auto matchCost = static_cast<double>(t3 - t2) / cv::getTickFrequency();
+        std::cout << "match(s):" << matchCost << std::endl;
+    }

-    const auto trainCost = static_cast<double>(t1 - t0) / cv::getTickFrequency();
-    const auto matchCost = static_cast<double>(t2 - t1) / cv::getTickFrequency();
-    std::cout << "train(s):" << trainCost << " match(s):" << matchCost << std::endl;
    for (int i = 0; i < count; i++) {
        const auto &pose = poses[ i ];
        std::cout << pose.x << "," << pose.y << "," << pose.angle << "," << pose.score << std::endl;
@ -60,7 +107,8 @@ int main(int argc, const char *argv[]) {

        auto start = cv::getTickCount();
        for (int i = 0; i < times; i++) {
-            matchModel(dst, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
+            matchModel(dst.data, dst.cols, dst.rows, dst.channels(), static_cast<int>(dst.step), 0,
+                       0, dst.cols, dst.rows, model, &count, poses.data(), -1, 0, 360, 0, score, 1);
            count = 70;
        }
        auto end = cv::getTickCount();
--- a/privateType.h
+++ b/privateType.h
@ -2,49 +2,40 @@

 #include <opencv2/core.hpp>

-struct HRLE {
-    int row         = -1;
-    int startColumn = -1;
-    int length      = 0;
-};
-
-struct VRLE {
-    int col      = -1;
-    int startRow = -1;
-    int length   = 0;
-};
-
-using HRegion = std::vector<HRLE>;
-using VRegion = std::vector<VRLE>;
-
-struct Template {
-    cv::Mat         img;
-    HRegion         hRegion;
-    VRegion         vRegion;
-    cv::RotatedRect rect;
-
-    double mean    = 0;
-    double normal  = 0;
-    double invArea = 0;
-};
-
-struct Layer {
-    double angleStep = 0;
-
-    std::vector<Template> templates;
-};
-
 struct Model {
-    double startAngle = 0;
-    double stopAngle  = 0;
-    double angleStep  = 0;
+    std::vector<cv::Mat>    pyramids;
+    std::vector<cv::Scalar> mean;
+    std::vector<double>     normal;
+    std::vector<double>     invArea;
+    std::vector<uchar>      equal1;
+    uchar                   borderColor = 0;

-    cv::Size           srcSize;
-    std::vector<Layer> layers;
+    void clear() {
+        pyramids.clear();
+        normal.clear();
+        invArea.clear();
+        mean.clear();
+        equal1.clear();
+    }
+
+    void resize(const std::size_t size) {
+        normal.resize(size);
+        invArea.resize(size);
+        mean.resize(size);
+        equal1.resize(size);
+    }
+
+    void reserve(const std::size_t size) {
+        pyramids.reserve(size);
+        normal.reserve(size);
+        invArea.reserve(size);
+        mean.reserve(size);
+        equal1.reserve(size);
+    }
 };

 #if CV_VERSION_MAJOR >= 4 && CV_VERSION_MINOR >= 8
 #define simdSize(type) cv::VTraits<type>::nlanes
 #else
 #define simdSize(type) type::nlanes
-#endif
+#endif
--- a/serialize.cpp
+++ b/serialize.cpp
@ -0,0 +1,228 @@
+#include "grayMatch.h"
+#include "privateType.h"
+
+#include <opencv2/core/hal/intrin.hpp>
+
+class Buffer {
+public:
+    Buffer(const int size_, unsigned char *data_)
+        : m_size(size_)
+        , m_data(data_) {}
+
+    virtual ~Buffer() = default;
+
+    virtual void operator&(uchar &val)                   = 0;
+    virtual void operator&(std::vector<cv::Mat> &val)    = 0;
+    virtual void operator&(std::vector<cv::Scalar> &val) = 0;
+    virtual void operator&(std::vector<double> &val)     = 0;
+    virtual void operator&(std::vector<uchar> &val)      = 0;
+
+    void operator&(Model &val) {
+        this->operator&(val.pyramids);
+        this->operator&(val.mean);
+        this->operator&(val.normal);
+        this->operator&(val.invArea);
+        this->operator&(val.equal1);
+        this->operator&(val.borderColor);
+    }
+
+    [[nodiscard]] int count() const {
+        return m_size;
+    }
+
+protected:
+    int            m_size = 0;
+    unsigned char *m_data = nullptr;
+};
+
+void binWrite(void *const dst, const void *src, const int size) {
+    memcpy(dst, src, size);
+}
+
+void fakeWrite(void *const dst, const void *src, const int size) {
+    (void)dst;
+    (void)src;
+    (void)size;
+}
+
+using Write = void (*)(void *, const void *, int);
+
+template <Write write> class OutBuffer final : public Buffer {
+public:
+    explicit OutBuffer(unsigned char *const data_)
+        : Buffer(0, data_) {}
+
+    void operator&(uchar &val) override {
+        write(m_data + m_size, &val, sizeof(val));
+        m_size += static_cast<int>(sizeof(val));
+    }
+    void operator&(std::vector<cv::Mat> &val) override {
+        const int size = static_cast<int>(val.size());
+        write(m_data + m_size, &size, sizeof(size));
+        m_size += static_cast<int>(sizeof(size));
+
+        for (auto &element : val) {
+            writeElement(element);
+        }
+    }
+    void writeElement(cv::Mat &val) {
+        write(m_data + m_size, &val.cols, sizeof(int));
+        m_size += static_cast<int>(sizeof(int));
+
+        write(m_data + m_size, &val.rows, sizeof(int));
+        m_size += static_cast<int>(sizeof(int));
+
+        for (int i = 0; i < val.rows; i++) {
+            write(m_data + m_size, val.ptr<unsigned char>(i), val.cols);
+            m_size += val.cols;
+        }
+    }
+    void operator&(std::vector<cv::Scalar> &val) override {
+        const int size = static_cast<int>(val.size());
+        write(m_data + m_size, &size, sizeof(size));
+        m_size += static_cast<int>(sizeof(size));
+
+        for (auto &element : val) {
+            writeElement(element);
+        }
+    }
+    void writeElement(const cv::Scalar &val) {
+        write(m_data + m_size, val.val, sizeof(double) * 4);
+        m_size += static_cast<int>(sizeof(double)) * 4;
+    }
+    void operator&(std::vector<double> &val) override {
+        const int size = static_cast<int>(val.size());
+        write(m_data + m_size, &size, sizeof(size));
+        m_size += static_cast<int>(sizeof(size));
+
+        write(m_data + m_size, val.data(), static_cast<int>(sizeof(double)) * size);
+        m_size += static_cast<int>(sizeof(double)) * size;
+    }
+    void operator&(std::vector<uchar> &val) override {
+        const int size = static_cast<int>(val.size());
+        write(m_data + m_size, &size, sizeof(size));
+        m_size += static_cast<int>(sizeof(size));
+
+        write(m_data + m_size, val.data(), sizeof(uchar) * size);
+        m_size += static_cast<int>(sizeof(uchar)) * size;
+    }
+};
+
+using SizeCountBuffer = OutBuffer<fakeWrite>;
+using WriteBuffer     = OutBuffer<binWrite>;
+
+class ReadBuffer final : public Buffer {
+public:
+    explicit ReadBuffer(unsigned char *data_)
+        : Buffer(0, data_) {}
+
+    void operator&(uchar &val) override {
+        memcpy(&val, m_data + m_size, sizeof(uchar));
+        m_size += static_cast<int>(sizeof(uchar));
+    }
+    void operator&(std::vector<cv::Mat> &val) override {
+        int count = 0;
+        memcpy(&count, m_data + m_size, sizeof(int));
+        val.resize(count);
+        m_size += static_cast<int>(sizeof(count));
+
+        for (auto &element : val) {
+            read(element);
+        }
+    }
+    void read(cv::Mat &val) {
+        int width = 0;
+        memcpy(&width, m_data + m_size, sizeof(int));
+        m_size += static_cast<int>(sizeof(int));
+
+        int height = 0;
+        memcpy(&height, m_data + m_size, sizeof(int));
+        m_size += static_cast<int>(sizeof(int));
+
+        const int  alignedWidth = static_cast<int>(cv::alignSize(width, simdSize(cv::v_uint8)));
+        const auto img          = cv::Mat::zeros(height, alignedWidth, CV_8UC1);
+        val                     = img(cv::Rect(0, 0, width, height));
+
+        for (int y = 0; y < height; y++) {
+            auto *ptr = val.ptr<uchar>(y);
+            memcpy(ptr, m_data + m_size, width);
+            m_size += width;
+        }
+    }
+    void operator&(std::vector<cv::Scalar> &val) override {
+        int count = 0;
+        memcpy(&count, m_data + m_size, sizeof(int));
+        val.resize(count);
+        m_size += static_cast<int>(sizeof(count));
+
+        for (auto &element : val) {
+            read(element);
+        }
+    }
+    void read(cv::Scalar &val) {
+        memcpy(val.val, m_data + m_size, sizeof(double) * 4);
+        m_size += static_cast<int>(sizeof(double)) * 4;
+    }
+    void operator&(std::vector<double> &val) override {
+        int count = 0;
+        memcpy(&count, m_data + m_size, sizeof(int));
+        val.resize(count);
+        m_size += static_cast<int>(sizeof(count));
+
+        memcpy(val.data(), m_data + m_size, sizeof(double) * count);
+        m_size += static_cast<int>(sizeof(double)) * count;
+    }
+    void operator&(std::vector<uchar> &val) override {
+        int count = 0;
+        memcpy(&count, m_data + m_size, sizeof(int));
+        val.resize(count);
+        m_size += static_cast<int>(sizeof(count));
+
+        memcpy(val.data(), m_data + m_size, sizeof(bool) * count);
+        m_size += static_cast<int>(sizeof(uchar)) * count;
+    }
+};
+
+void operation(Buffer *buf, Model &model) {
+    *buf &model;
+}
+
+bool serialize(Model *const model, unsigned char *buffer, int *size) {
+    if (nullptr == size) {
+        return false;
+    }
+
+    if (nullptr == model) {
+        *size = 0;
+        return false;
+    }
+
+    SizeCountBuffer counter(buffer);
+    operation(&counter, *model);
+
+    if (nullptr == buffer) {
+        *size = counter.count();
+        return true;
+    }
+
+    if (counter.count() > *size) {
+        *size = 0;
+        return false;
+    }
+
+    WriteBuffer writer(buffer);
+    operation(&writer, *model);
+    return true;
+}
+
+Model_t deserialize(unsigned char *buffer, const int size) {
+    if (size < 1 || nullptr == buffer) {
+        return nullptr;
+    }
+
+    ReadBuffer reader(buffer);
+    auto      *model = new Model;
+    operation(&reader, *model);
+
+    return model;
+}
--- a/sum.cpp
+++ b/sum.cpp
@ -1,240 +0,0 @@
-#include "sum.h"
-
-#include <opencv2/core/hal/intrin.hpp>
-
-inline cv::v_uint32x4 v_add_expand(const cv::v_uint16x8 &src) {
-    cv::v_uint32x4 low;
-    cv::v_uint32x4 high;
-    cv::v_expand(src, low, high);
-
-    return cv::v_add(low, high);
-}
-
-inline cv::v_uint64x2 v_add_expand(const cv::v_uint32x4 &src) {
-    cv::v_uint64x2 low;
-    cv::v_uint64x2 high;
-    cv::v_expand(src, low, high);
-
-    return cv::v_add(low, high);
-}
-
-inline void computeSum(const cv::v_uint8x16 &src, cv::v_uint32x4 &sum, cv::v_uint64x2 &sqSum) {
-    cv::v_uint16x8 low;
-    cv::v_uint16x8 high;
-    cv::v_expand(src, low, high);
-
-    sum            = cv::v_add(sum, v_add_expand(cv::v_add(low, high)));
-    const auto dot = cv::v_dotprod_expand_fast(src, src);
-    sqSum          = cv::v_add(sqSum, v_add_expand(dot));
-}
-
-void computeSum(const cv::Mat &src, const HRegion &hRegion, uint64 &sum, uint64 &sqSum) {
-    constexpr auto blockSize = simdSize(cv::v_uint8);
-    const auto    *srcPtr    = src.data;
-    cv::v_uint32x4 vSum      = cv::v_setzero_u32();
-    cv::v_uint64x2 vSqSum    = cv::v_setzero_u64();
-    uint32_t       partSum   = 0;
-    uint64         partSqSum = 0;
-
-    for (const auto &rle : hRegion) {
-        const auto *ptr = srcPtr + src.step * rle.row + rle.startColumn;
-
-        int i = 0;
-        for (; i < rle.length - blockSize; i += blockSize) {
-            computeSum(cv::v_load(ptr + i), vSum, vSqSum);
-        }
-
-        // TODO aligned fill 0
-        for (; i < rle.length; i++) {
-            const auto val  = ptr[ i ];
-            partSum        += val;
-            partSqSum      += static_cast<ushort>(val) * static_cast<ushort>(val);
-        }
-    }
-
-    sum   = cv::v_reduce_sum(vSum) + partSum;
-    sqSum = cv::v_reduce_sum(vSqSum) + partSqSum;
-}
-
-inline void computeSumDiff(const cv::v_uint16x8 &start, const cv::v_uint16x8 &end,
-                           cv::v_int32x4 &diff0, cv::v_int32x4 &diff1) {
-    cv::v_int16x8 sub;
-    {
-        const auto vStart = cv::v_reinterpret_as_s16(start);
-        const auto vEnd   = cv::v_reinterpret_as_s16(end);
-        sub               = cv::v_sub(vEnd, vStart);
-    }
-
-    cv::v_int32x4 val = cv::v_expand_low(sub);
-    diff0             = cv::v_add(diff0, val);
-
-    val   = cv::v_expand_high(sub);
-    diff1 = cv::v_add(diff1, val);
-}
-
-inline void computeSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
-                           cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
-                           cv::v_int32x4 &diff3) {
-    computeSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0, diff1);
-    computeSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff2, diff3);
-}
-
-inline void computeSqSumDiff(const cv::v_uint32x4 &start, const cv::v_uint32x4 &end,
-                             cv::v_int32x4 &diff0) {
-    const cv::v_int32x4 vStart = cv::v_reinterpret_as_s32(start);
-    const cv::v_int32x4 vEnd   = cv::v_reinterpret_as_s32(end);
-
-    const cv::v_int32x4 sub = cv::v_sub(vEnd, vStart);
-    diff0                   = cv::v_add(diff0, sub);
-}
-
-inline void computeSqSumDiff(cv::v_uint16x8 &start, cv::v_uint16x8 &end, cv::v_int32x4 &diff0,
-                             cv::v_int32x4 &diff1) {
-    start = cv::v_mul(start, start);
-    end   = cv::v_mul(end, end);
-
-    computeSqSumDiff(cv::v_expand_low(start), cv::v_expand_low(end), diff0);
-    computeSqSumDiff(cv::v_expand_high(start), cv::v_expand_high(end), diff1);
-}
-
-inline void computeSqSumDiff(const cv::v_uint8x16 &start, const cv::v_uint8x16 &end,
-                             cv::v_int32x4 &diff0, cv::v_int32x4 &diff1, cv::v_int32x4 &diff2,
-                             cv::v_int32x4 &diff3) {
-
-    auto vStart = cv::v_expand_low(start);
-    auto vEnd   = cv::v_expand_low(end);
-    computeSqSumDiff(vStart, vEnd, diff0, diff1);
-
-    vStart = cv::v_expand_high(start);
-    vEnd   = cv::v_expand_high(end);
-    computeSqSumDiff(vStart, vEnd, diff2, diff3);
-}
-
-inline void v_expand_store(double *ptr, const std::array<int, 4> &val) {
-    ptr[ 0 ] = ptr[ -1 ] + val[ 0 ];
-    ptr[ 1 ] = ptr[ 0 ] + val[ 1 ];
-    ptr[ 2 ] = ptr[ 1 ] + val[ 2 ];
-    ptr[ 3 ] = ptr[ 2 ] + val[ 3 ];
-}
-
-void shiftH(const uchar *src, std::size_t srcStep, const HRegion &hRegion, int row, double *sum,
-            std::size_t sumStep, int sumWidth, double *sqSum, std::size_t sqSumStep) {
-    constexpr auto blockSize = simdSize(cv::v_uint8);
-    auto          *srcPtr    = src;
-    auto          *sumPtr    = sum + row * sumStep;
-    auto          *sqSumPtr  = sqSum + row * sqSumStep;
-
-    std::array<int, 4> buf{};
-
-    int i = 1;
-    for (; i < sumWidth - blockSize; i += blockSize) {
-        cv::v_int32x4 diff0 = cv::v_setzero_s32();
-        cv::v_int32x4 diff1 = cv::v_setzero_s32();
-        cv::v_int32x4 diff2 = cv::v_setzero_s32();
-        cv::v_int32x4 diff3 = cv::v_setzero_s32();
-
-        cv::v_int32x4 diff10 = cv::v_setzero_s32();
-        cv::v_int32x4 diff11 = cv::v_setzero_s32();
-        cv::v_int32x4 diff12 = cv::v_setzero_s32();
-        cv::v_int32x4 diff13 = cv::v_setzero_s32();
-
-        for (const auto &rle : hRegion) {
-            auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
-            auto *endPtr   = startPtr + rle.length;
-
-            auto vStart = cv::v_load(startPtr);
-            auto vEnd   = cv::v_load(endPtr);
-            computeSumDiff(vStart, vEnd, diff0, diff1, diff2, diff3);
-            computeSqSumDiff(vStart, vEnd, diff10, diff11, diff12, diff13);
-        }
-
-        auto *sumPtrStart = sumPtr + i;
-        cv::v_store(buf.data(), diff0);
-        v_expand_store(sumPtrStart, buf);
-        cv::v_store(buf.data(), diff1);
-        v_expand_store(sumPtrStart + 4, buf);
-        cv::v_store(buf.data(), diff2);
-        v_expand_store(sumPtrStart + 8, buf);
-        cv::v_store(buf.data(), diff3);
-        v_expand_store(sumPtrStart + 12, buf);
-
-        auto *sqSumPtrStart = sqSumPtr + i;
-        cv::v_store(buf.data(), diff10);
-        v_expand_store(sqSumPtrStart, buf);
-        cv::v_store(buf.data(), diff11);
-        v_expand_store(sqSumPtrStart + 4, buf);
-        cv::v_store(buf.data(), diff12);
-        v_expand_store(sqSumPtrStart + 8, buf);
-        cv::v_store(buf.data(), diff13);
-        v_expand_store(sqSumPtrStart + 12, buf);
-    }
-
-    for (; i < sumWidth; i++) {
-        int32_t partSum   = 0;
-        int32_t partSqSum = 0;
-        for (const auto &rle : hRegion) {
-            auto *startPtr = srcPtr + (row + rle.row) * srcStep + rle.startColumn + i - 1;
-            auto *endPtr   = startPtr + rle.length;
-
-            const int32_t start  = *startPtr;
-            const int32_t end    = *endPtr;
-            partSum             += end - start;
-            partSqSum           += end * end - start * start;
-        }
-
-        auto *sumPtrStart   = sumPtr + i;
-        sumPtrStart[ 0 ]    = sumPtrStart[ -1 ] + partSum;
-        auto *sqSumPtrStart = sqSumPtr + i;
-        sqSumPtrStart[ 0 ]  = sqSumPtrStart[ -1 ] + partSqSum;
-    }
-}
-
-void shiftV(const uchar *src, std::size_t srcStep, const VRegion &vRegion, int row, double *sum,
-            std::size_t sumStep, double *sqSum, std::size_t sqSumStep) {
-    auto *srcPtr   = src;
-    auto *sumPtr   = sum + row * sumStep;
-    auto *sqSumPtr = sqSum + row * sqSumStep;
-
-    int32_t partSum   = 0;
-    int32_t partSqSum = 0;
-    for (const auto &rle : vRegion) {
-        auto *startPtr = srcPtr + (row + rle.startRow - 1) * srcStep + rle.col;
-        auto *endPtr   = startPtr + rle.length * srcStep;
-
-        const int32_t start = *startPtr;
-        const int32_t end   = *endPtr;
-
-        partSum   += end - start;
-        partSqSum += end * end - start * start;
-    }
-
-    sumPtr[ 0 ]   = *(sumPtr - sumStep) + partSum;
-    sqSumPtr[ 0 ] = *(sqSumPtr - sqSumStep) + partSqSum;
-}
-
-void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
-                 const HRegion &hRegion, const VRegion &vRegion) {
-    const auto size = src.size() - templateSize + cv::Size(1, 1);
-    sum.create(size, CV_64FC1);
-    sqSum.create(size, CV_64FC1);
-
-    const auto *srcPtr    = src.data;
-    auto       *sumPtr    = reinterpret_cast<double *>(sum.data);
-    auto       *sqSumPtr  = reinterpret_cast<double *>(sqSum.data);
-    const auto  sumStep   = sum.step1();
-    const auto  sqSumStep = sqSum.step1();
-
-    // compute first
-    uint64 sum0;
-    uint64 sqSum0;
-    computeSum(src, hRegion, sum0, sqSum0);
-    sumPtr[ 0 ]   = static_cast<double>(sum0);
-    sqSumPtr[ 0 ] = static_cast<double>(sqSum0);
-
-    for (int y = 0; y < size.height; y++) {
-        shiftH(srcPtr, src.step, hRegion, y, sumPtr, sumStep, sum.cols, sqSumPtr, sqSumStep);
-        if (y + 1 < size.height) {
-            shiftV(srcPtr, src.step, vRegion, y + 1, sumPtr, sumStep, sqSumPtr, sqSumStep);
-        }
-    }
-}
--- a/sum.h
+++ b/sum.h
@ -1,8 +0,0 @@
-#pragma once
-
-#include "privateType.h"
-
-#include <opencv2/opencv.hpp>
-
-void integralSum(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum, const cv::Size &templateSize,
-                 const HRegion &hRegion, const VRegion &vRegion);
Author	SHA1	Message	Date
y.qiu	64298e45eb	static check	2025-05-08 17:56:22 +08:00
y.qiu	93bef37422	fix: mismatch faction sign	2025-03-27 13:43:37 +08:00
y.qiu	58cdd2facf	add cmd	2025-03-25 17:56:02 +08:00
y.qiu	a756fc1bf2	unroll	2025-03-10 09:10:32 +08:00
y.qiu	0f2391b255	omp optim	2025-03-02 12:47:11 +08:00
y.qiu	8448b07e17	update readme	2025-02-27 21:04:58 +08:00
y.qiu	b2652072ef	misc	2025-02-26 17:35:52 +08:00
y.qiu	972dcf7cb3	small fix	2025-02-22 17:21:42 +08:00
y.qiu	d0b8a6f3a2	refactor intrinsics api	2025-02-10 21:33:38 +08:00
y.qiu	80e63e7637	set valid opencv version	2025-01-21 21:02:02 +08:00
y.qiu	8a975e19c9	add todo	2025-01-21 20:48:01 +08:00
y.qiu	45971ed070	update readme	2024-12-27 22:05:26 +08:00
y.qiu	094ada9337	misc	2024-12-23 09:05:27 +08:00
y.qiu	d07f070668	add pos subpixel	2024-12-22 21:01:38 +08:00
y.qiu	a3b1b64e9a	fix: image size	2024-12-01 17:57:15 +08:00
y.qiu	d1e4abfc71	misc	2024-11-03 21:12:39 +08:00
y.qiu	cff37c2f08	fix: invalid size write	2024-11-03 20:51:33 +08:00
y.qiu	440c938b22	last miss fix	2024-10-08 21:08:15 +08:00
y.qiu	b6e2d9aba5	unified simd size	2024-10-08 20:39:30 +08:00
y.qiu	4a2f97e354	fix arm64	2024-09-27 21:11:19 +08:00
y.qiu	4b8ec4359c	optimal cache miss	2024-09-22 17:20:24 +08:00
y.qiu	d451b46535	misc	2024-09-21 09:16:15 +08:00
y.qiu	09017f0bc0	let integral as standalone	2024-09-20 22:50:25 +08:00
y.qiu	eea1b6320b	spilt vector and scalar code	2024-09-20 22:42:46 +08:00
y.qiu	b4cdaff8d1	misc	2024-09-20 17:55:29 +08:00
y.qiu	6109ff388a	update README.md	2024-09-19 22:47:25 +08:00
y.qiu	98acd6efbd	misc	2024-09-19 15:50:57 +08:00
y.qiu	92b09a51c7	misc	2024-09-19 11:28:20 +08:00
y.qiu	7f5f665af2	misc	2024-09-19 11:20:33 +08:00
y.qiu	44cf2a0526	fix integral	2024-09-19 10:32:26 +08:00
y.qiu	df8267b60d	simd integral image	2024-09-18 22:31:51 +08:00
y.qiu	01a20a18f1	static check	2024-09-18 19:53:27 +08:00
y.qiu	087ab9bc17	misc	2024-09-18 18:10:35 +08:00
y.qiu	d7bf2ad384	no copy	2024-09-18 15:45:03 +08:00
y.qiu	2c20dc2006	add openmp support	2024-09-18 11:21:33 +08:00
y.qiu	57ae2c8704	update doc	2024-09-13 20:54:55 +08:00
y.qiu	8e7227f74b	demo: match start top level	2024-09-13 20:39:50 +08:00
SurfaceMan	c72b709178	add LICENSE	2024-09-13 20:28:04 +08:00
y.qiu	b5523f8491	replace call cv::Mat::ptr	2024-09-13 17:55:58 +08:00
y.qiu	f43f78f45f	add readme	2024-09-12 21:07:31 +08:00
y.qiu	15372e0b60	add sample image	2024-09-12 21:06:50 +08:00
y.qiu	3d315445ab	remove unused	2024-09-12 19:58:50 +08:00
y.qiu	d03818a63c	special optim for arm	2024-09-12 08:36:55 +08:00
y.qiu	ed14c0e6f4	product hand write	2024-09-11 22:15:47 +08:00
y.qiu	fa20b52b85	serialize to file	2024-09-11 17:53:31 +08:00
y.qiu	7a88dad291	misc	2024-09-11 17:52:38 +08:00
y.qiu	dd5de6f6f1	misc	2024-09-11 14:30:31 +08:00
y.qiu	8050b8612a	🎈 perf(match): less reduce sum	2024-09-11 10:21:56 +08:00
y.qiu	3a689e900f	revert	2024-09-06 21:19:58 +08:00
y.qiu	b8918b123d	static check	2024-09-06 14:35:08 +08:00
y.qiu	1c67371cc9	fix type	2024-09-05 13:57:41 +08:00
y.qiu	422c12d691	impl shift	2024-09-05 11:21:55 +08:00
y.qiu	bece4f138f	misc	2024-09-04 18:14:09 +08:00
y.qiu	75098c811c	remove unused	2024-09-04 14:57:09 +08:00
y.qiu	a52deadc30	misc	2024-09-04 09:15:30 +08:00
y.qiu	ef1556c538	build config	2024-09-04 09:12:58 +08:00
y.qiu	92160ee1d9	misc	2024-09-04 09:06:42 +08:00
y.qiu	29fb7b001c	static check	2024-09-03 22:53:25 +08:00
y.qiu	d85d4f7443	misc	2024-09-03 18:05:13 +08:00
y.qiu	13cbf06f46	aligned model at training	2024-09-03 17:03:42 +08:00
y.qiu	a5c8049af1	aligned model at deserialization	2024-09-03 15:27:48 +08:00
y.qiu	371b926fb0	cleanr name	2024-09-02 17:57:10 +08:00
y.qiu	3409395778	misc	2024-09-01 17:54:02 +08:00
y.qiu	36392b3e34	format check	2024-08-31 18:21:00 +08:00
y.qiu	726a55725a	fix doc	2024-08-30 22:00:56 +08:00
y.qiu	338c1dcf96	static check	2024-08-30 21:53:42 +08:00
y.qiu	6dd78fbe21	support rect roi	2024-08-30 20:57:12 +08:00
y.qiu	665559d8ca	export api	2024-08-29 15:20:48 +08:00
y.qiu	8758d7801d	misc	2024-08-27 10:36:22 +08:00
y.qiu	fc4f04b9be	add loongarch simd(lsx)	2024-08-25 20:59:55 +08:00
				`@ -0,0 +1 @@`
				`1. opencv intrinsics api [changed](https://github.com/opencv/opencv/pull/24371) since 4.9, need refactor`