update README.md

misc
2024-09-19 22:47:25 +08:00 · 2024-09-19 15:50:57 +08:00 · 2024-09-19 11:28:20 +08:00 · 2024-09-19 11:20:33 +08:00 · 2024-09-19 10:32:26 +08:00 · 2024-09-18 22:31:51 +08:00
15 changed files with 437 additions and 142 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -3,6 +3,18 @@ cmake_minimum_required(VERSION 3.12)
 project(match)
 find_package(OpenCV REQUIRED)

+option(ENABLE_OPENMP "enable openmp" OFF)
+if(ENABLE_OPENMP)
+        # find OpenMP
+        find_package(OpenMP REQUIRED)
+
+        if(OPENMP_FOUND)
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+            set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+        endif(OPENMP_FOUND)
+endif(ENABLE_OPENMP)
+
 #==============================================================
 #library
 #==============================================================
@ -15,13 +27,18 @@ add_library(algo SHARED
 )

 target_include_directories(algo PRIVATE ${OpenCV_INCLUDE_DIRS})
-target_link_libraries(algo ${OpenCV_LIBRARIES})
+target_link_libraries(algo ${OpenCV_LIBRARIES} $<$<BOOL:${OPENMP_FOUND}>:OpenMP::OpenMP_CXX>)
 target_compile_options(algo PRIVATE
        $<$<CXX_COMPILER_ID:MSVC>:/W4 /WX /external:W0>
-        $<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -mlsx -fPIC -fvisibility=hidden -Wl,--exclude-libs,ALL -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
+        $<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wl,--exclude-libs,ALL -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
        $<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
+        $<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:-mlsx>
 )
-target_compile_definitions(algo PUBLIC API_EXPORTS)
+target_compile_definitions(algo PUBLIC API_EXPORTS
+        $<$<STREQUAL:${CMAKE_SYSTEM_PROCESSOR},loongarch64>:CV_LSX>
+)
+
+message("Arch:${CMAKE_SYSTEM_PROCESSOR}")

 #==============================================================
 #exe
@ -37,3 +54,4 @@ target_compile_options(${PROJECT_NAME} PRIVATE
        $<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>: -fPIC -fvisibility=hidden -Wall -Wextra -Wpedantic -Wmisleading-indentation -Wunused -Wuninitialized -Wshadow -Wconversion -Werror>
        $<$<AND:$<CXX_COMPILER_ID:Clang>,$<STREQUAL:${CMAKE_SYSTEM_NAME},Windows>>:/W4 /WX /external:W0>
 )
+target_compile_definitions(${PROJECT_NAME} PRIVATE IMG_DIR="${CMAKE_CURRENT_SOURCE_DIR}/img")
--- a/24
+++ b/24
@ -0,0 +1,24 @@
+BSD 2-Clause License
+
+Copyright (c) 2024, SurfaceMan
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,17 @@
+# Template match with gray model(ncc)
+
+## highlights:
+ 1. original code based [Fastest_Image_Pattern_Matching](https://github.com/DennisLiu1993/Fastest_Image_Pattern_Matching), you can checkout tag [v1.0](https://github.com/SurfaceMan/gray_match/releases/tag/v1.0) for more details.
+ 2. refactor simd match process with opencv [Universal intrinsics](https://docs.opencv.org/4.x/df/d91/group__core__hal__intrin.html), have be tested on x86_64(sse),arm(neon),LoongArch(lsx).
+ 3. support model save/load as binary file
+ 4. provide pure c interface
+ 5. support openmp
+ 
+## usage:
+all you need can be found in [main.cpp](main.cpp)
+
+## gallery:
+![sample](img/result.png)
+
+
+ 
--- a/grayMatch.cpp
+++ b/grayMatch.cpp
@ -141,6 +141,15 @@ int computeLayers(const int width, const int height, const int minArea) {
    return layer;
 }

+inline cv::Point2d transform(const cv::Point2d &point, const cv::Mat &rotate) {
+    const auto ptr = rotate.ptr<double>();
+
+    auto x = point.x * ptr[ 0 ] + point.y * ptr[ 1 ] + ptr[ 2 ];
+    auto y = point.x * ptr[ 3 ] + point.y * ptr[ 4 ] + ptr[ 5 ];
+
+    return {x, y};
+}
+
 cv::Size computeRotationSize(const cv::Size &dstSize, const cv::Size &templateSize, double angle,
                             const cv::Mat &rotate) {
    if (angle > 360) {
@ -157,25 +166,20 @@ cv::Size computeRotationSize(const cv::Size &dstSize, const cv::Size &templateSi
        return dstSize;
    }

-    const std::vector<cv::Point2d> points{
-        {0, 0},
-        {static_cast<double>(dstSize.width) - 1, 0},
-        {0, static_cast<double>(dstSize.height) - 1},
-        {static_cast<double>(dstSize.width) - 1, static_cast<double>(dstSize.height) - 1}};
-    std::vector<cv::Point2d> trans;
-    cv::transform(points, trans, rotate);
+    const std::vector<cv::Point2d> pt{
+        transform({0, 0}, rotate), transform({static_cast<double>(dstSize.width) - 1, 0}, rotate),
+        transform({0, static_cast<double>(dstSize.height) - 1}, rotate),
+        transform({static_cast<double>(dstSize.width) - 1, static_cast<double>(dstSize.height) - 1},
+                  rotate)};

-    cv::Point2d min = trans[ 0 ];
-    cv::Point2d max = trans[ 0 ];
-    for (const auto &point : trans) {
-        min.x = std::min(min.x, point.x);
-        min.y = std::min(min.y, point.y);
-        max.x = std::max(max.x, point.x);
-        max.y = std::max(max.y, point.y);
-    }
+    cv::Point2d min;
+    cv::Point2d max;
+    min.x = std::min(std::min(std::min(pt[ 0 ].x, pt[ 1 ].x), pt[ 2 ].x), pt[ 3 ].x);
+    min.y = std::min(std::min(std::min(pt[ 0 ].y, pt[ 1 ].y), pt[ 2 ].y), pt[ 3 ].y);
+    max.x = std::max(std::max(std::max(pt[ 0 ].x, pt[ 1 ].x), pt[ 2 ].x), pt[ 3 ].x);
+    max.y = std::max(std::max(std::max(pt[ 0 ].y, pt[ 1 ].y), pt[ 2 ].y), pt[ 3 ].y);

    if (angle > 0 && angle < 90) {
-        ;
    } else if (angle > 90 && angle < 180) {
        angle -= 90;
    } else if (angle > 180 && angle < 270) {
@ -199,15 +203,217 @@ cv::Size computeRotationSize(const cv::Size &dstSize, const cv::Size &templateSi
                           (templateSize.width > size.width && templateSize.height < size.height) ||
                           templateSize.area() > size.area();
    if (wrongSize) {
-        size = {static_cast<int>(max.x - min.x + 0.5), static_cast<int>(max.y - min.y + 0.5)};
+        size = {static_cast<int>(lround(max.x - min.x + 0.5)),
+                static_cast<int>(lround(max.y - min.y + 0.5))};
    }

    return size;
 }

-void coeffDenominator(const cv::Mat &src, const cv::Size &templateSize, cv::Mat &result,
-                      const double mean, const double normal, const double invArea,
-                      const bool equal1) {
+#ifdef CV_SIMD
+void matchTemplateSimd(const cv::Mat &src, const cv::Mat &templateImg, cv::Mat &result) {
+    result = cv::Mat(src.size() - templateImg.size() + cv::Size(1, 1), CV_32FC1);
+
+    auto       *resultStart = result.ptr<float>();
+    const auto  resultStep  = result.step[ 0 ] / result.step[ 1 ];
+    const auto *srcStart    = src.ptr<uchar>();
+    const auto  srcStep     = src.step[ 0 ];
+    const auto *temStart    = templateImg.ptr<uchar>();
+    const auto  temStep     = templateImg.step[ 0 ];
+    for (int y = 0; y < result.rows; y++) {
+        auto *resultPtr = resultStart + resultStep * y;
+        for (int x = 0; x < result.cols; x++) {
+            auto vSum = cv::vx_setall_u32(0);
+            for (int templateRow = 0; templateRow < templateImg.rows; templateRow++) {
+                auto *srcPtr = srcStart + srcStep * (y + templateRow) + x;
+                auto *temPtr = temStart + temStep * templateRow;
+
+                for (int i = 0; i < templateImg.cols; i += cv::v_uint8::nlanes) {
+                    auto vTem = cv::v_load_aligned(temPtr + i);
+                    auto vSrc = cv::v_load(srcPtr + i);
+
+#ifdef __aarch64__
+                    cv::v_uint16 vDot1;
+                    cv::v_uint16 vDot2;
+                    cv::v_uint32 v1;
+                    cv::v_uint32 v2;
+                    cv::v_uint32 v3;
+                    cv::v_uint32 v4;
+                    cv::v_mul_expand(vTem, vSrc, vDot1, vDot2);
+                    cv::v_expand(vDot1, v1, v2);
+                    cv::v_expand(vDot2, v3, v4);
+                    vSum += v1 + v2 + v3 + v4;
+#else
+                    vSum += cv::v_dotprod_expand(vSrc, vTem);
+#endif
+                }
+            }
+
+            auto sum       = cv::v_reduce_sum(vSum);
+            resultPtr[ x ] = static_cast<float>(sum);
+        }
+    }
+}
+
+inline void integralSum(const cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
+    auto sum  = src + cv::v_rotate_left<1>(src);
+    sum      += cv::v_rotate_left<2>(sum);
+    sum      += cv::v_rotate_left<4>(sum);
+
+    cv::v_uint32 v1;
+    cv::v_uint32 v2;
+    cv::v_expand(sum, v1, v2);
+    v1  += pre;
+    v2  += pre;
+    pre  = cv::v_setall_u32(cv::v_extract_n<cv::v_uint32::nlanes - 1>(v2));
+
+    cv::v_uint64 v3;
+    cv::v_uint64 v4;
+    cv::v_expand(v1, v3, v4);
+    cv::v_store(dst, cv::v_cvt_f64(cv::v_reinterpret_as_s64(v3)) + cv::v_load(prevDst));
+    cv::v_store(dst + cv::v_float64::nlanes, cv::v_cvt_f64(cv::v_reinterpret_as_s64(v4)) +
+                                                 cv::v_load(prevDst + cv::v_float64::nlanes));
+
+    cv::v_expand(v2, v3, v4);
+    cv::v_store(dst + cv::v_float64::nlanes * 2,
+                cv::v_cvt_f64(cv::v_reinterpret_as_s64(v3)) +
+                    cv::v_load(prevDst + cv::v_float64::nlanes * 2));
+    cv::v_store(dst + cv::v_float64::nlanes * 3,
+                cv::v_cvt_f64(cv::v_reinterpret_as_s64(v4)) +
+                    cv::v_load(prevDst + cv::v_float64::nlanes * 3));
+}
+
+inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
+    cv::v_uint32 v1;
+    cv::v_uint32 v2;
+    cv::v_expand(src, v1, v2);
+
+    {
+        auto         shift1 = cv::v_rotate_left<1>(src);
+        cv::v_uint32 v3;
+        cv::v_uint32 v4;
+        cv::v_expand(shift1, v3, v4);
+
+        v1 += v3;
+        v2 += v4;
+
+        v4  = cv::v_extract<2>(v1, v2);
+        v2 += v4;
+
+        v3  = cv::v_rotate_left<2>(v1);
+        v1 += v3;
+
+        v1 += pre;
+        v2 += v1;
+
+        pre = cv::v_setall_u32(cv::v_extract_n<cv::v_uint32::nlanes - 1>(v2));
+    }
+
+    cv::v_uint64 v3;
+    cv::v_uint64 v4;
+    cv::v_expand(v1, v3, v4);
+    cv::v_store(dst, cv::v_cvt_f64(cv::v_reinterpret_as_s64(v3)) + cv::v_load(prevDst));
+    cv::v_store(dst + cv::v_float64::nlanes, cv::v_cvt_f64(cv::v_reinterpret_as_s64(v4)) +
+                                                 cv::v_load(prevDst + cv::v_float64::nlanes));
+
+    cv::v_expand(v2, v3, v4);
+    cv::v_store(dst + cv::v_float64::nlanes * 2,
+                cv::v_cvt_f64(cv::v_reinterpret_as_s64(v3)) +
+                    cv::v_load(prevDst + cv::v_float64::nlanes * 2));
+    cv::v_store(dst + cv::v_float64::nlanes * 3,
+                cv::v_cvt_f64(cv::v_reinterpret_as_s64(v4)) +
+                    cv::v_load(prevDst + cv::v_float64::nlanes * 3));
+}
+
+/*
+inline void integralSqSum(cv::v_uint32 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
+    src += cv::v_rotate_left<1>(src);
+    src += cv::v_rotate_left<2>(src);
+    src += pre;
+    pre  = cv::v_setall_u32(cv::v_extract_n<cv::v_uint32::nlanes - 1>(src));
+
+    cv::v_uint64 v1;
+    cv::v_uint64 v2;
+    cv::v_expand(src, v1, v2);
+
+    cv::v_store(dst, cv::v_cvt_f64(cv::v_reinterpret_as_s64(v1)) + cv::v_load(prevDst));
+    cv::v_store(dst + cv::v_float64::nlanes, cv::v_cvt_f64(cv::v_reinterpret_as_s64(v2)) +
+                                                 cv::v_load(prevDst + cv::v_float64::nlanes));
+}
+
+inline void integralSqSum(cv::v_uint16 &src, double *dst, double *prevDst, cv::v_uint32 &pre) {
+    cv::v_uint32 v1;
+    cv::v_uint32 v2;
+    cv::v_expand(src, v1, v2);
+    integralSqSum(v1, dst, prevDst, pre);
+    integralSqSum(v2, dst + cv::v_uint32::nlanes, prevDst + cv::v_uint32::nlanes, pre);
+}
+*/
+
+inline void integralSum(cv::v_uint16 &v1, cv::v_uint16 &v2, double *dst, double *prevDst,
+                        cv::v_uint32 &pre) {
+    integralSum(v1, dst, prevDst, pre);
+    integralSum(v2, dst + cv::v_uint16::nlanes, prevDst + cv::v_uint16::nlanes, pre);
+}
+
+inline void integralSqSum(cv::v_uint16 &v1, cv::v_uint16 &v2, double *dst, double *prevDst,
+                          cv::v_uint32 &pre) {
+    v1 = cv::v_mul_wrap(v1, v1);
+    v2 = cv::v_mul_wrap(v2, v2);
+
+    integralSqSum(v1, dst, prevDst, pre);
+    integralSqSum(v2, dst + cv::v_uint16::nlanes, prevDst + cv::v_uint16::nlanes, pre);
+}
+
+void integralSimd(const cv::Mat &src, cv::Mat &sum, cv::Mat &sqSum) {
+    const auto size = src.size() + cv::Size(1, 1);
+    sum.create(size, CV_64FC1);
+    sqSum.create(size, CV_64FC1);
+    memset(sum.data, 0, sum.rows * sum.step[ 0 ]);
+    memset(sqSum.data, 0, sqSum.rows * sqSum.step[ 0 ]);
+
+    const auto *srcStart   = src.ptr<uchar>();
+    const auto  srcStep    = src.step[ 0 ];
+    auto       *sumStart   = sum.ptr<double>(1) + 1;
+    const auto  sumStep    = sum.step[ 0 ] / sum.step[ 1 ];
+    auto       *sqSumStart = sqSum.ptr<double>(1) + 1;
+    const auto  sqSumStep  = sqSum.step[ 0 ] / sqSum.step[ 1 ];
+
+    for (int y = 0; y < src.rows; y++) {
+        auto *srcPtr      = srcStart + srcStep * y;
+        auto *sumPtr      = sumStart + sumStep * y;
+        auto *sqSumPtr    = sqSumStart + sqSumStep * y;
+        auto *preSumPtr   = sumStart + sumStep * (y - 1);
+        auto *preSqSumPtr = sqSumStart + sqSumStep * (y - 1);
+
+        cv::v_uint32 prevSum   = cv::vx_setzero_u32();
+        cv::v_uint32 prevSqSum = cv::vx_setzero_u32();
+        int          x         = 0;
+        for (; x < src.cols - cv::v_uint8::nlanes; x += cv::v_uint8::nlanes) {
+            cv::v_uint16 v1;
+            cv::v_uint16 v2;
+            cv::v_expand(cv::v_load(srcPtr + x), v1, v2);
+
+            integralSum(v1, v2, sumPtr + x, preSumPtr + x, prevSum);
+            integralSqSum(v1, v2, sqSumPtr + x, preSqSumPtr + x, prevSqSum);
+        }
+        auto prevSum2   = cv::v_extract_n<cv::v_uint32::nlanes - 1>(prevSum);
+        auto prevSqSum2 = cv::v_extract_n<cv::v_uint32::nlanes - 1>(prevSqSum);
+        for (; x < src.cols; x++) {
+            auto val       = srcPtr[ x ];
+            auto sqVal     = val * val;
+            prevSum2      += val;
+            sumPtr[ x ]    = prevSum2 + preSumPtr[ x ];
+            prevSqSum2    += sqVal;
+            sqSumPtr[ x ]  = prevSqSum2 + preSqSumPtr[ x ];
+        }
+    }
+}
+#endif
+
+void ccoeffDenominator(const cv::Mat &src, const cv::Size &templateSize, cv::Mat &result,
+                       const double mean, const double normal, const double invArea,
+                       const bool equal1) {
    if (equal1) {
        result = cv::Scalar::all(1);
        return;
@ -215,7 +421,11 @@ void coeffDenominator(const cv::Mat &src, const cv::Size &templateSize, cv::Mat

    cv::Mat sum;
    cv::Mat sqSum;
+#ifdef CV_SIMD
+    integralSimd(src, sum, sqSum);
+#else
    cv::integral(src, sum, sqSum, CV_64F);
+#endif

    const auto *q0 = sqSum.ptr<double>(0);
    const auto *q1 = q0 + templateSize.width;
@ -235,23 +445,19 @@ void coeffDenominator(const cv::Mat &src, const cv::Size &templateSize, cv::Mat
        for (int x = 0; x < result.cols; x++, idx++) {
            auto      &score     = scorePtr[ x ];
            const auto partSum   = p0[ idx ] - p1[ idx ] - p2[ idx ] + p3[ idx ];
-            auto       partMean  = partSum * partSum;
-            const auto num       = score - partSum * mean;
-            partMean            *= invArea;
+            const auto numerator = score - partSum * mean;

-            auto partSum2 = q0[ idx ] - q1[ idx ] - q2[ idx ] + q3[ idx ];
+            auto partSqSum    = q0[ idx ] - q1[ idx ] - q2[ idx ] + q3[ idx ];
+            auto partSqNormal = partSqSum - partSum * partSum * invArea;

-            const auto diff = std::max(partSum2 - partMean, 0.);
-            if (diff <= std::min(0.5, 10 * FLT_EPSILON * partSum2)) {
-                partSum2 = 0;
-            } else {
-                partSum2 = sqrt(diff) * normal;
-            }
+            const auto diff = std::max(partSqNormal, 0.);
+            double     denominator =
+                (diff <= std::min(0.5, 10 * FLT_EPSILON * partSqSum)) ? 0 : sqrt(diff) * normal;

-            if (abs(num) < partSum2) {
-                score = static_cast<float>(num / partSum2);
-            } else if (abs(num) < partSum2 * 1.125) {
-                score = num > 0.f ? 1.f : -1.f;
+            if (abs(numerator) < denominator) {
+                score = static_cast<float>(numerator / denominator);
+            } else if (abs(numerator) < denominator * 1.125) {
+                score = numerator > 0.f ? 1.f : -1.f;
            } else {
                score = 0;
            }
@ -259,48 +465,14 @@ void coeffDenominator(const cv::Mat &src, const cv::Size &templateSize, cv::Mat
    }
 }

-#ifdef CV_SIMD
-float convSimd(const uchar *kernel, const uchar *src, const int kernelWidth) {
-    const auto blockSize = cv::v_uint8::nlanes;
-    auto       vSum      = cv::vx_setall_u32(0);
-    int        i         = 0;
-    for (; i < kernelWidth - blockSize; i += blockSize) {
-        vSum += cv::v_dotprod_expand(cv::v_load(kernel + i), cv::v_load(src + i));
-    }
-    auto sum = cv::v_reduce_sum(vSum);
-
-    for (; i < kernelWidth; i++) {
-        sum += kernel[ i ] * src[ i ];
-    }
-
-    return static_cast<float>(sum);
-}
-
-void matchTemplateSimd(const cv::Mat &src, const cv::Mat &templateImg, cv::Mat &result) {
-    result = cv::Mat::zeros(src.size() - templateImg.size() + cv::Size(1, 1), CV_32FC1);
-
-    for (int y = 0; y < result.rows; y++) {
-        auto *resultPtr = result.ptr<float>(y);
-        for (int x = 0; x < result.cols; x++) {
-            auto &score = resultPtr[ x ];
-            for (int templateRow = 0; templateRow < templateImg.rows; templateRow++) {
-                auto *srcPtr  = src.ptr<uchar>(y + templateRow) + x;
-                auto *temPtr  = templateImg.ptr<uchar>(templateRow);
-                score        += convSimd(temPtr, srcPtr, templateImg.cols);
-            }
-        }
-    }
-}
-#endif
-
 void matchTemplate(cv::Mat &src, cv::Mat &result, const Model *model, int level) {
 #ifdef CV_SIMD
    matchTemplateSimd(src, model->pyramids[ level ], result);
 #else
    cv::matchTemplate(src, model->pyramids[ level ], result, cv::TM_CCORR);
 #endif
-    coeffDenominator(src, model->pyramids[ level ].size(), result, model->mean[ level ][ 0 ],
-                     model->normal[ level ], model->invArea[ level ], model->equal1[ level ] == 1);
+    ccoeffDenominator(src, model->pyramids[ level ].size(), result, model->mean[ level ][ 0 ],
+                      model->normal[ level ], model->invArea[ level ], model->equal1[ level ] == 1);
 }

 void nextMaxLoc(const cv::Point &pos, const cv::Size templateSize, const double maxOverlap,
@ -334,17 +506,25 @@ void nextMaxLoc(cv::Mat &score, const cv::Point &pos, const cv::Size templateSiz
    cv::minMaxLoc(score, nullptr, &maxScore, nullptr, &maxPos);
 }

-inline cv::Point2d transform(const cv::Point2d &point, const cv::Mat &rotate) {
-    const auto ptr = rotate.ptr<double>();
+inline cv::Mat getRotationMatrix2D(const cv::Point2f &center, double angle) {
+    angle        *= CV_PI / 180;
+    double alpha  = std::cos(angle);
+    double beta   = std::sin(angle);

-    auto x = point.x * ptr[ 0 ] + point.y * ptr[ 1 ] + ptr[ 2 ];
-    auto y = point.x * ptr[ 3 ] + point.y * ptr[ 4 ] + ptr[ 5 ];
+    cv::Mat rotate(2, 3, CV_64FC1);
+    auto    ptr = rotate.ptr<double>();
+    ptr[ 0 ]    = alpha;
+    ptr[ 1 ]    = beta;
+    ptr[ 2 ]    = (1 - alpha) * center.x - beta * center.y;
+    ptr[ 3 ]    = -beta;
+    ptr[ 4 ]    = alpha;
+    ptr[ 5 ]    = beta * center.x + (1 - alpha) * center.y;

-    return {x, y};
+    return rotate;
 }

 inline cv::Point2d transform(const cv::Point2d &point, const cv::Point &center, double angle) {
-    const auto rotate = cv::getRotationMatrix2D(center, angle, 1.);
+    const auto rotate = getRotationMatrix2D(center, angle);

    return transform(point, rotate);
 }
@ -354,14 +534,13 @@ inline cv::Point2d sizeCenter(const cv::Size &size) {
 }

 void cropRotatedRoi(const cv::Mat &src, const cv::Size &templateSize, const cv::Point2d topLeft,
-                    const cv::Mat &rotate, cv::Mat &roi) {
+                    cv::Mat &rotate, cv::Mat &roi) {
    const auto     point = transform(topLeft, rotate);
    const cv::Size paddingSize(templateSize.width + 6, templateSize.height + 6);
-    auto           rt    = rotate;
-    rt.at<double>(0, 2) -= point.x - 3;
-    rt.at<double>(1, 2) -= point.y - 3;
+    rotate.at<double>(0, 2) -= point.x - 3;
+    rotate.at<double>(1, 2) -= point.y - 3;

-    cv::warpAffine(src, roi, rt, paddingSize);
+    cv::warpAffine(src, roi, rotate, paddingSize);
 }

 void filterOverlap(std::vector<Candidate> &candidates, const std::vector<cv::RotatedRect> &rects,
@ -431,11 +610,24 @@ Model *trainModel(const cv::Mat &src, int level) {

    auto  *result = new Model;
    Model &model  = *result;
-    cv::buildPyramid(src, model.pyramids, level);
-    model.borderColor = cv::mean(src).val[ 0 ] < 128 ? 255 : 0;
-    model.reserve(model.pyramids.size());

-    for (const auto &pyramid : model.pyramids) {
+    std::vector<cv::Mat> pyramids;
+    cv::buildPyramid(src, pyramids, level);
+    model.borderColor = cv::mean(src).val[ 0 ] < 128 ? 255 : 0;
+    model.reserve(pyramids.size());
+
+    for (const auto &pyramid : pyramids) {
+        int     alignedWidth = static_cast<int>(cv::alignSize(pyramid.cols, cv::v_uint8::nlanes));
+        auto    img          = cv::Mat::zeros(pyramid.rows, alignedWidth, CV_8UC1);
+        cv::Mat sub          = img(cv::Rect(0, 0, pyramid.cols, pyramid.rows));
+        for (int y = 0; y < pyramid.rows; y++) {
+            auto *dstPtr = sub.ptr<uchar>(y);
+            auto *srcPtr = pyramid.ptr<uchar>(y);
+            memcpy(dstPtr, srcPtr, pyramid.cols);
+        }
+
+        model.pyramids.push_back(sub);
+
        auto invArea = 1. / pyramid.size().area();

        cv::Scalar mean;
@ -460,6 +652,9 @@ inline double sizeAngleStep(const cv::Size &size) {
    return atan(2. / std::max(size.width, size.height)) * 180. / CV_PI;
 }

+#pragma omp declare reduction(combine : std::vector<Candidate> : omp_out.insert(                   \
+        omp_out.end(), omp_in.begin(), omp_in.end()))
+
 std::vector<Candidate> matchTopLevel(const cv::Mat &dstTop, double startAngle, double spanAngle,
                                     double maxOverlap, double minScore, int maxCount,
                                     const Model *model, int level) {
@ -472,9 +667,10 @@ std::vector<Candidate> matchTopLevel(const cv::Mat &dstTop, double startAngle, d
    bool calMaxByBlock = (dstTop.size().area() / templateTop.size().area() > 500) && maxCount > 10;

    const auto count = static_cast<int>(spanAngle / angleStep) + 1;
+#pragma omp parallel for reduction(combine : candidates)
    for (int i = 0; i < count; i++) {
        const auto angle  = startAngle + angleStep * i;
-        auto       rotate = cv::getRotationMatrix2D(center, angle, 1.);
+        auto       rotate = getRotationMatrix2D(center, angle);
        auto       size   = computeRotationSize(dstTop.size(), templateTop.size(), angle, rotate);

        auto tx                  = (size.width - 1) / 2. - center.x;
@ -536,9 +732,11 @@ std::vector<Candidate> matchDownLevel(const std::vector<cv::Mat>   &pyramids,
                                      const std::vector<Candidate> &candidates, double minScore,
                                      int subpixel, const Model *model, int level) {
    std::vector<Candidate> levelMatched;
+    auto                   count = static_cast<int>(candidates.size());

-    for (const auto &candidate : candidates) {
-        auto pose    = candidate;
+#pragma omp parallel for reduction(combine : levelMatched)
+    for (int index = 0; index < count; index++) {
+        auto pose    = candidates[ index ];
        bool matched = true;
        for (int currentLevel = level - 1; currentLevel >= 0; currentLevel--) {
            const auto &currentTemplateImg = model->pyramids[ currentLevel ];
@ -560,7 +758,7 @@ std::vector<Candidate> matchDownLevel(const std::vector<cv::Mat>   &pyramids,
            cv::Mat   newScoreRect;
            for (int i = -1; i <= 1; i++) {
                auto angle  = pose.angle + i * currentAngleStep;
-                auto rotate = cv::getRotationMatrix2D(center, angle, 1.);
+                auto rotate = getRotationMatrix2D(center, angle);

                cv::Mat roi;
                cropRotatedRoi(currentDstImg, tmpSize, topLeft, rotate, roi);
@ -663,7 +861,7 @@ std::vector<Pose> matchModel(const cv::Mat &dst, const Model *model, int level,
        for (const auto &candidate : levelMatched) {
            std::vector<cv::Point2f> points{topRight + cv::Point2f(candidate.pos),
                                            bottomRight + cv::Point2f(candidate.pos)};
-            auto rotate = cv::getRotationMatrix2D(candidate.pos, -candidate.angle, 1.);
+            auto                     rotate = getRotationMatrix2D(candidate.pos, -candidate.angle);
            std::vector<cv::Point2f> rotatedPoints;
            cv::transform(points, rotatedPoints, rotate);

@ -695,14 +893,14 @@ std::vector<Pose> matchModel(const cv::Mat &dst, const Model *model, int level,
    return result;
 }

-Model_t trainModel(const unsigned char *data, int width, int height, int channels, int bytesPerline,
+Model_t trainModel(const unsigned char *data, int width, int height, int channels, int bytesPerLine,
                   int roiLeft, int roiTop, int roiWidth, int roiHeight, int levelNum) {
-    if ((1 != channels && 3 == channels && 4 == channels) || nullptr == data) {
+    if ((1 != channels && 3 != channels && 4 != channels) || nullptr == data) {
        return nullptr;
    }

    auto    type = channels == 1 ? CV_8UC1 : (channels == 3 ? CV_8UC3 : CV_8UC4);
-    cv::Mat img(cv::Size(width, height), type, (void *)data, bytesPerline);
+    cv::Mat img(cv::Size(width, height), type, const_cast<unsigned char *>(data), bytesPerLine);

    cv::Mat src;
    if (1 == channels) {
@ -721,7 +919,7 @@ Model_t trainModel(const unsigned char *data, int width, int height, int channel
    return trainModel(src(roi), levelNum);
 }

-void matchModel(const unsigned char *data, int width, int height, int channels, int bytesPerline,
+void matchModel(const unsigned char *data, int width, int height, int channels, int bytesPerLine,
                int roiLeft, int roiTop, int roiWidth, int roiHeight, const Model_t model,
                int *count, Pose *poses, int level, double startAngle, double spanAngle,
                double maxOverlap, double minScore, int subpixel) {
@ -740,7 +938,7 @@ void matchModel(const unsigned char *data, int width, int height, int channels,
    }

    auto    type = channels == 1 ? CV_8UC1 : (channels == 3 ? CV_8UC3 : CV_8UC4);
-    cv::Mat img(cv::Size(width, height), type, (void *)data, bytesPerline);
+    cv::Mat img(cv::Size(width, height), type, const_cast<unsigned char *>(data), bytesPerLine);

    cv::Mat dst;
    if (1 == channels) {
@ -779,7 +977,7 @@ void freeModel(Model_t *model) {
    *model = nullptr;
 }

-int modelLevel(const Model_t model) {
+int modelLevel(const Model *model) {
    if (nullptr == model) {
        return 0;
    }
@ -787,7 +985,7 @@ int modelLevel(const Model_t model) {
    return static_cast<int>(model->pyramids.size());
 }

-void modelImage(const Model_t model, int level, unsigned char *data, int length, int *width,
+void modelImage(const Model *model, int level, unsigned char *data, int length, int *width,
                int *height, int *channels) {
    if (nullptr == model) {
        return;
--- a/grayMatch.h
+++ b/grayMatch.h
@ -20,16 +20,16 @@ struct Pose {
 * @param width image width
 * @param height image height
 * @param channels image channels 1(gray)/3(rgb)/4(rgba)
- * @param bytesPerline bytes per line
+ * @param bytesPerLine bytes per line
 * @param roiLeft rectangle roi left
 * @param roiTop rectangle roi top
 * @param roiWidth rectangle roi width
 * @param roiHeight rectangle roi height
- * @param levelNum pyramid levels (> 0)
+ * @param levelNum pyramid levels (> 0:user setting,-1:auto)
 * @return
 */
 API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height, int channels,
-                              int bytesPerline, int roiLeft, int roiTop, int roiWidth,
+                              int bytesPerLine, int roiLeft, int roiTop, int roiWidth,
                              int roiHeight, int levelNum);
 /**
 * @brief match model
@ -37,7 +37,7 @@ API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height,
 * @param width image width
 * @param height image height
 * @param channels image channels 1(gray)/3(rgb)/4(rgba)
- * @param bytesPerline bytes per line
+ * @param bytesPerLine bytes per line
 * @param roiLeft rectangle roi left
 * @param roiTop rectangle roi top
 * @param roiWidth rectangle roi width
@ -45,7 +45,7 @@ API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height,
 * @param model trained model
 * @param count in(max detect count)/out(found count)
 * @param poses pose array inited with size not less than count
- * @param level match start at which level (level>=0 && level<modelLevel-1)
+ * @param level match start at which level (level>=0 && level<modelLevel-1,-1:auto)
 * @param startAngle rotation start angle
 * @param spanAngle rotation angle range
 * @param maxOverlap overlap threshold
@ -54,7 +54,7 @@ API_PUBLIC Model_t trainModel(const unsigned char *data, int width, int height,
 * @return
 */
 API_PUBLIC void matchModel(const unsigned char *data, int width, int height, int channels,
-                           int bytesPerline, int roiLeft, int roiTop, int roiWidth, int roiHeight,
+                           int bytesPerLine, int roiLeft, int roiTop, int roiWidth, int roiHeight,
                           const Model_t model, int *count, Pose *poses, int level,
                           double startAngle, double spanAngle, double maxOverlap, double minScore,
                           int subpixel);
@ -97,7 +97,7 @@ API_PUBLIC void freeModel(Model_t *model);
 API_PUBLIC bool serialize(const Model_t model, unsigned char *buffer, int *size);

 /**
- * @brief desrialize model
+ * @brief deserialize model
 * @param buffer
 * @param size buffer size
 * @return model
--- a/img/3.bmp
+++ b/img/3.bmp
--- a/img/h.bmp
+++ b/img/h.bmp
--- a/img/i.bmp
+++ b/img/i.bmp
--- a/img/j.bmp
+++ b/img/j.bmp
--- a/img/k.bmp
+++ b/img/k.bmp
--- a/img/l.bmp
+++ b/img/l.bmp
--- a/img/result.png
+++ b/img/result.png
--- a/main.cpp
+++ b/main.cpp
@ -1,39 +1,70 @@
 #include "grayMatch.h"

+#include <fstream>
 #include <iostream>
 #include <opencv2/opencv.hpp>

 int main() {
-    auto src =
-        cv::imread("C:/Users/qiuyong/Desktop/test/template/model3.bmp", cv::IMREAD_GRAYSCALE);
-    auto dst =
-        cv::imread("C:/Users/qiuyong/Desktop/test/template/model3_src1.bmp", cv::IMREAD_GRAYSCALE);
+    auto src = cv::imread(std::string(IMG_DIR) + "/3.bmp", cv::IMREAD_GRAYSCALE);
+    auto dst = cv::imread(std::string(IMG_DIR) + "/h.bmp", cv::IMREAD_GRAYSCALE);

-    auto t0    = cv::getTickCount();
-    auto model = trainModel(src.data, src.cols, src.rows, src.channels(), int(src.step), 0, 0,
-                            src.cols, src.rows, -1);
-    auto t1    = cv::getTickCount();
+    const std::string modelName("model.bin");
+    {
+        auto t0    = cv::getTickCount();
+        auto model = trainModel(src.data, src.cols, src.rows, src.channels(), int(src.step), 0, 0,
+                                src.cols, src.rows, -1);
+        auto t1    = cv::getTickCount();

-    int size;
-    serialize(model, nullptr, &size);
-    std::vector<uchar> buffer(size);
-    serialize(model, buffer.data(), &size);
+        // get size
+        int size;
+        serialize(model, nullptr, &size);

-    freeModel(&model);
+        // serialize to buffer
+        std::vector<uchar> buffer(size);
+        serialize(model, buffer.data(), &size);

-    model = deserialize(buffer.data(), int(buffer.size()));
+        // write to file
+        std::ofstream ofs(modelName, std::ios::binary | std::ios::out);
+        if (!ofs.is_open()) {
+            return -1;
+        }
+        ofs.write((const char *)buffer.data(), size);
+
+        freeModel(&model);
+
+        auto trainCost = double(t1 - t0) / cv::getTickFrequency();
+        std::cout << "train(s):" << trainCost << std::endl;
+    }

    int               num = 70;
    std::vector<Pose> poses(num);
+    {
+        // open file
+        std::ifstream ifs(modelName, std::ios::binary | std::ios::in);
+        if (!ifs.is_open()) {
+            return -2;
+        }

-    auto t2 = cv::getTickCount();
-    matchModel(dst.data, dst.cols, dst.rows, dst.channels(), int(dst.step), 0, 0, dst.cols,
-               dst.rows, model, &num, poses.data(), -1, 0, 360, 0, 0.5, 1);
-    auto t3 = cv::getTickCount();
+        // get size
+        ifs.seekg(0, std::ios::end);
+        auto size = ifs.tellg();
+        ifs.seekg(0, std::ios::beg);

-    auto trainCost = double(t1 - t0) / cv::getTickFrequency();
-    auto matchCost = double(t3 - t2) / cv::getTickFrequency();
-    std::cout << "train(s):" << trainCost << " match(s):" << matchCost << std::endl;
+        // read to buffer
+        std::vector<uchar> buffer(size);
+        ifs.read((char *)buffer.data(), size);
+
+        // deserialize from buffer
+        auto model = deserialize(buffer.data(), int(buffer.size()));
+
+        auto t2 = cv::getTickCount();
+        matchModel(dst.data, dst.cols, dst.rows, dst.channels(), int(dst.step), 0, 0, dst.cols,
+                   dst.rows, model, &num, poses.data(), -1, 0, 360, 0, 0.5, 1);
+        auto t3 = cv::getTickCount();
+
+        auto matchCost = double(t3 - t2) / cv::getTickFrequency();
+        std::cout << "match(s):" << matchCost << std::endl;
+    }

    cv::Mat color;
    cv::cvtColor(dst, color, cv::COLOR_GRAY2RGB);
--- a/privateType.h
+++ b/privateType.h
@ -26,6 +26,7 @@ struct Model {
    }

    void reserve(const std::size_t size) {
+        pyramids.reserve(size);
        normal.reserve(size);
        invArea.reserve(size);
        mean.reserve(size);
--- a/serialize.cpp
+++ b/serialize.cpp
@ -1,12 +1,16 @@
 #include "grayMatch.h"
 #include "privateType.h"

+#include <opencv2/core/hal/intrin.hpp>
+
 class Buffer {
 public:
    Buffer(int size_, unsigned char *data_)
        : m_size(size_)
        , m_data(data_) {}

+    virtual ~Buffer() = default;
+
    virtual void operator&(uchar &val)                   = 0;
    virtual void operator&(std::vector<cv::Mat> &val)    = 0;
    virtual void operator&(std::vector<cv::Scalar> &val) = 0;
@ -22,7 +26,7 @@ public:
        this->operator&(val.borderColor);
    }

-    int count() const {
+    [[nodiscard]] int count() const {
        return m_size;
    }

@ -31,11 +35,6 @@ protected:
    unsigned char *m_data = nullptr;
 };

-class WriteOperationBase {
-public:
-    virtual void write(void *dst, void *src, int size) = 0;
-};
-
 void binWrite(void *dst, void *src, int size) {
    memcpy(dst, src, size);
 }
@ -50,7 +49,7 @@ using Write = void (*)(void *, void *, int);

 template <Write write> class OutBuffer : public Buffer {
 public:
-    OutBuffer(unsigned char *data_)
+    explicit OutBuffer(unsigned char *data_)
        : Buffer(0, data_) {}

    void operator&(uchar &val) final {
@ -114,7 +113,7 @@ using WriteBuffer     = OutBuffer<binWrite>;

 class ReadBuffer : public Buffer {
 public:
-    ReadBuffer(unsigned char *data_)
+    explicit ReadBuffer(unsigned char *data_)
        : Buffer(0, data_) {}

    void operator&(uchar &val) final {
@ -140,8 +139,15 @@ public:
        memcpy(&height, m_data + m_size, sizeof(int));
        m_size += static_cast<int>(sizeof(int));

-        val     = cv::Mat(cv::Size(width, height), CV_8UC1, m_data + m_size);
-        m_size += width * height;
+        int  alignedWidth = static_cast<int>(cv::alignSize(width, cv::v_uint8::nlanes));
+        auto img          = cv::Mat::zeros(height, alignedWidth, CV_8UC1);
+        val               = img(cv::Rect(0, 0, width, height));
+
+        for (int y = 0; y < height; y++) {
+            auto *ptr = val.ptr<uchar>(y);
+            memcpy(ptr, m_data + m_size, width);
+            m_size += width;
+        }
    }
    void operator&(std::vector<cv::Scalar> &val) final {
        int count = 0;
@ -191,15 +197,15 @@ bool serialize(const Model_t model, unsigned char *buffer, int *size) {
        return false;
    }

-    SizeCountBuffer countor(buffer);
-    operation(&countor, *model);
-    *size = countor.count();
+    SizeCountBuffer counter(buffer);
+    operation(&counter, *model);
+    *size = counter.count();

    if (nullptr == buffer) {
        return true;
    }

-    if (countor.count() > *size) {
+    if (counter.count() > *size) {
        *size = 0;
        return false;
    }
Author	SHA1	Message	Date
y.qiu	6109ff388a	update README.md	2024-09-19 22:47:25 +08:00
y.qiu	98acd6efbd	misc	2024-09-19 15:50:57 +08:00
y.qiu	92b09a51c7	misc	2024-09-19 11:28:20 +08:00
y.qiu	7f5f665af2	misc	2024-09-19 11:20:33 +08:00
y.qiu	44cf2a0526	fix integral	2024-09-19 10:32:26 +08:00
y.qiu	df8267b60d	simd integral image	2024-09-18 22:31:51 +08:00
y.qiu	01a20a18f1	static check	2024-09-18 19:53:27 +08:00
y.qiu	087ab9bc17	misc	2024-09-18 18:10:35 +08:00
y.qiu	d7bf2ad384	no copy	2024-09-18 15:45:03 +08:00
y.qiu	2c20dc2006	add openmp support	2024-09-18 11:21:33 +08:00
y.qiu	57ae2c8704	update doc	2024-09-13 20:54:55 +08:00
y.qiu	8e7227f74b	demo: match start top level	2024-09-13 20:39:50 +08:00
SurfaceMan	c72b709178	add LICENSE	2024-09-13 20:28:04 +08:00
y.qiu	b5523f8491	replace call cv::Mat::ptr	2024-09-13 17:55:58 +08:00
y.qiu	f43f78f45f	add readme	2024-09-12 21:07:31 +08:00
y.qiu	15372e0b60	add sample image	2024-09-12 21:06:50 +08:00
y.qiu	3d315445ab	remove unused	2024-09-12 19:58:50 +08:00
y.qiu	d03818a63c	special optim for arm	2024-09-12 08:36:55 +08:00
y.qiu	ed14c0e6f4	product hand write	2024-09-11 22:15:47 +08:00
y.qiu	fa20b52b85	serialize to file	2024-09-11 17:53:31 +08:00
y.qiu	7a88dad291	misc	2024-09-11 17:52:38 +08:00
y.qiu	dd5de6f6f1	misc	2024-09-11 14:30:31 +08:00
y.qiu	8050b8612a	🎈 perf(match): less reduce sum	2024-09-11 10:21:56 +08:00
y.qiu	3a689e900f	revert	2024-09-06 21:19:58 +08:00
y.qiu	b8918b123d	static check	2024-09-06 14:35:08 +08:00
y.qiu	1c67371cc9	fix type	2024-09-05 13:57:41 +08:00
y.qiu	422c12d691	impl shift	2024-09-05 11:21:55 +08:00
y.qiu	bece4f138f	misc	2024-09-04 18:14:09 +08:00
y.qiu	75098c811c	remove unused	2024-09-04 14:57:09 +08:00
y.qiu	a52deadc30	misc	2024-09-04 09:15:30 +08:00
y.qiu	ef1556c538	build config	2024-09-04 09:12:58 +08:00
y.qiu	92160ee1d9	misc	2024-09-04 09:06:42 +08:00
y.qiu	29fb7b001c	static check	2024-09-03 22:53:25 +08:00
y.qiu	d85d4f7443	misc	2024-09-03 18:05:13 +08:00
y.qiu	13cbf06f46	aligned model at training	2024-09-03 17:03:42 +08:00
y.qiu	a5c8049af1	aligned model at deserialization	2024-09-03 15:27:48 +08:00
y.qiu	371b926fb0	cleanr name	2024-09-02 17:57:10 +08:00