From aa4b54952cd408c36f676d62f8fc6801c614359c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 16 Apr 2025 14:05:46 +0800 Subject: [PATCH] branch-2.1: [enhancement]Optimize GeoFunctions for const columns #34396 (#50067) Cherry-picked from #34396 Co-authored-by: koarz <66543806+koarz@users.noreply.github.com> --- be/src/vec/functions/functions_geo.cpp | 248 +++++++++++++++------ be/test/vec/function/function_geo_test.cpp | 157 ++++++++----- 2 files changed, 289 insertions(+), 116 deletions(-) diff --git a/be/src/vec/functions/functions_geo.cpp b/be/src/vec/functions/functions_geo.cpp index ac6969c582..172f000928 100644 --- a/be/src/vec/functions/functions_geo.cpp +++ b/be/src/vec/functions/functions_geo.cpp @@ -44,33 +44,70 @@ struct StPoint { DCHECK_EQ(arguments.size(), 2); auto return_type = block.get_data_type(result); - auto column_x = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto column_y = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_column, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); - const auto size = column_x->size(); + const auto size = std::max(left_column->size(), right_column->size()); MutableColumnPtr res = return_type->create_column(); GeoPoint point; std::string buf; - for (int row = 0; row < size; ++row) { - auto cur_res = point.from_coord(column_x->operator[](row).get(), - column_y->operator[](row).get()); - if (cur_res != GEO_PARSE_OK) { - res->insert_data(nullptr, 0); - continue; - } - - buf.clear(); - point.encode_to(&buf); - res->insert_data(buf.data(), buf.size()); + if (left_const) { + const_vector(left_column, right_column, res, size, point, buf); + } else if (right_const) { + vector_const(left_column, right_column, res, size, point, buf); + } else { + vector_vector(left_column, right_column, res, size, point, buf); } block.replace_by_position(result, std::move(res)); return Status::OK(); } + + static void loop_do(GeoParseStatus& cur_res, MutableColumnPtr& res, GeoPoint& point, + std::string& buf) { + if (cur_res != GEO_PARSE_OK) { + res->insert_data(nullptr, 0); + return; + } + + buf.clear(); + point.encode_to(&buf); + res->insert_data(buf.data(), buf.size()); + } + + static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size, GeoPoint& point, + std::string& buf) { + double x = left_column->operator[](0).get(); + for (int row = 0; row < size; ++row) { + auto cur_res = point.from_coord(x, right_column->operator[](row).get()); + loop_do(cur_res, res, point, buf); + } + } + + static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size, GeoPoint& point, + std::string& buf) { + double y = right_column->operator[](0).get(); + for (int row = 0; row < size; ++row) { + auto cur_res = point.from_coord(right_column->operator[](row).get(), y); + loop_do(cur_res, res, point, buf); + } + } + + static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size, GeoPoint& point, + std::string& buf) { + for (int row = 0; row < size; ++row) { + auto cur_res = point.from_coord(left_column->operator[](row).get(), + right_column->operator[](row).get()); + loop_do(cur_res, res, point, buf); + } + } }; struct StAsTextName { @@ -304,38 +341,79 @@ struct StAzimuth { auto return_type = block.get_data_type(result); MutableColumnPtr res = return_type->create_column(); - auto p1 = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto p2 = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - const auto size = p1->size(); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_column, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); + + const auto size = std::max(left_column->size(), right_column->size()); GeoPoint point1; GeoPoint point2; - - for (int row = 0; row < size; ++row) { - auto shape_value1 = p1->get_data_at(row); - auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); - if (!pt1) { - res->insert_default(); - continue; - } - - auto shape_value2 = p2->get_data_at(row); - auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); - if (!pt2) { - res->insert_default(); - continue; - } - - double angle = 0; - if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) { - res->insert_default(); - continue; - } - res->insert_data(const_cast((char*)&angle), 0); + if (left_const) { + const_vector(left_column, right_column, res, size, point1, point2); + } else if (right_const) { + vector_const(left_column, right_column, res, size, point1, point2); + } else { + vector_vector(left_column, right_column, res, size, point1, point2); } block.replace_by_position(result, std::move(res)); return Status::OK(); } + + static void loop_do(bool& pt1, bool& pt2, GeoPoint& point1, GeoPoint& point2, + MutableColumnPtr& res) { + if (!(pt1 && pt2)) { + res->insert_default(); + return; + } + + double angle = 0; + if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) { + res->insert_default(); + return; + } + res->insert_data(const_cast((char*)&angle), 0); + } + + static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, size_t size, GeoPoint& point1, + GeoPoint& point2) { + auto shape_value1 = left_column->get_data_at(0); + auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); + for (int row = 0; row < size; ++row) { + auto shape_value2 = right_column->get_data_at(row); + auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); + + loop_do(pt1, pt2, point1, point2, res); + } + } + + static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, size_t size, GeoPoint& point1, + GeoPoint& point2) { + auto shape_value2 = right_column->get_data_at(0); + auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); + for (int row = 0; row < size; ++row) { + auto shape_value1 = left_column->get_data_at(row); + auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); + + loop_do(pt1, pt2, point1, point2, res); + } + } + + static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, size_t size, GeoPoint& point1, + GeoPoint& point2) { + for (int row = 0; row < size; ++row) { + auto shape_value1 = left_column->get_data_at(row); + auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); + auto shape_value2 = right_column->get_data_at(row); + auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); + + loop_do(pt1, pt2, point1, point2, res); + } + } }; struct StAreaSquareMeters { @@ -463,36 +541,78 @@ struct StContains { size_t result) { DCHECK_EQ(arguments.size(), 2); auto return_type = block.get_data_type(result); - auto shape1 = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto shape2 = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_column, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); + + const auto size = std::max(left_column->size(), right_column->size()); - const auto size = shape1->size(); MutableColumnPtr res = return_type->create_column(); - int i; - std::vector> shapes = {nullptr, nullptr}; - for (int row = 0; row < size; ++row) { - auto lhs_value = shape1->get_data_at(row); - auto rhs_value = shape2->get_data_at(row); - StringRef* strs[2] = {&lhs_value, &rhs_value}; - for (i = 0; i < 2; ++i) { - shapes[i] = std::shared_ptr( - GeoShape::from_encoded(strs[i]->data, strs[i]->size)); - if (shapes[i] == nullptr) { - res->insert_default(); - break; - } - } - - if (i == 2) { - auto contains_value = shapes[0]->contains(shapes[1].get()); - res->insert_data(const_cast((char*)&contains_value), 0); - } + if (left_const) { + const_vector(left_column, right_column, res, size); + } else if (right_const) { + vector_const(left_column, right_column, res, size); + } else { + vector_vector(left_column, right_column, res, size); } block.replace_by_position(result, std::move(res)); return Status::OK(); } + static void loop_do(StringRef& lhs_value, StringRef& rhs_value, + std::vector>& shapes, int& i, + MutableColumnPtr& res) { + StringRef* strs[2] = {&lhs_value, &rhs_value}; + for (i = 0; i < 2; ++i) { + shapes[i] = + std::shared_ptr(GeoShape::from_encoded(strs[i]->data, strs[i]->size)); + if (shapes[i] == nullptr) { + res->insert_default(); + break; + } + } + + if (i == 2) { + auto contains_value = shapes[0]->contains(shapes[1].get()); + res->insert_data(const_cast((char*)&contains_value), 0); + } + } + + static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size) { + int i; + auto lhs_value = left_column->get_data_at(0); + std::vector> shapes = {nullptr, nullptr}; + for (int row = 0; row < size; ++row) { + auto rhs_value = right_column->get_data_at(row); + loop_do(lhs_value, rhs_value, shapes, i, res); + } + } + + static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size) { + int i; + auto rhs_value = right_column->get_data_at(0); + std::vector> shapes = {nullptr, nullptr}; + for (int row = 0; row < size; ++row) { + auto lhs_value = left_column->get_data_at(row); + loop_do(lhs_value, rhs_value, shapes, i, res); + } + } + + static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size) { + int i; + std::vector> shapes = {nullptr, nullptr}; + for (int row = 0; row < size; ++row) { + auto lhs_value = left_column->get_data_at(row); + auto rhs_value = right_column->get_data_at(row); + loop_do(lhs_value, rhs_value, shapes, i, res); + } + } + static Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) { return Status::OK(); } @@ -638,7 +758,7 @@ struct StAsBinary { auto return_type = block.get_data_type(result); MutableColumnPtr res = return_type->create_column(); - auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + auto col = block.get_by_position(arguments[0]).column; const auto size = col->size(); std::unique_ptr shape; diff --git a/be/test/vec/function/function_geo_test.cpp b/be/test/vec/function/function_geo_test.cpp index c4b699e881..0860eae773 100644 --- a/be/test/vec/function/function_geo_test.cpp +++ b/be/test/vec/function/function_geo_test.cpp @@ -40,21 +40,39 @@ using namespace ut_type; TEST(VGeoFunctionsTest, function_geo_st_point_test) { std::string func_name = "st_point"; + + GeoPoint point; + auto cur_res = point.from_coord(24.7, 56.7); + EXPECT_TRUE(cur_res == GEO_PARSE_OK); + std::string buf; + point.encode_to(&buf); + + DataSet data_set = {{{(double)24.7, (double)56.7}, buf}, + {{Null(), (double)5}, Null()}, + {{(double)5, Null()}, Null()}}; { InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64}; - GeoPoint point; - auto cur_res = point.from_coord(24.7, 56.7); - EXPECT_TRUE(cur_res == GEO_PARSE_OK); - std::string buf; - point.encode_to(&buf); - - DataSet data_set = {{{(double)24.7, (double)56.7}, buf}, - {{Null(), (double)5}, Null()}, - {{(double)5, Null()}, Null()}}; - static_cast(check_function(func_name, input_types, data_set)); } + { + InputTypeSet input_types = {Consted {TypeIndex::Float64}, TypeIndex::Float64}; + + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast( + check_function(func_name, input_types, const_dataset)); + } + } + { + InputTypeSet input_types = {TypeIndex::Float64, Consted {TypeIndex::Float64}}; + + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast( + check_function(func_name, input_types, const_dataset)); + } + } } TEST(VGeoFunctionsTest, function_geo_st_as_text) { @@ -199,62 +217,97 @@ TEST(VGeoFunctionsTest, function_geo_st_angle) { TEST(VGeoFunctionsTest, function_geo_st_azimuth) { std::string func_name = "st_azimuth"; + GeoPoint point1; + auto cur_res1 = point1.from_coord(0, 0); + EXPECT_TRUE(cur_res1 == GEO_PARSE_OK); + GeoPoint point2; + auto cur_res2 = point2.from_coord(1, 0); + EXPECT_TRUE(cur_res2 == GEO_PARSE_OK); + + std::string buf1; + point1.encode_to(&buf1); + std::string buf2; + point2.encode_to(&buf2); + + DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966}, + {{buf1, Null()}, Null()}, + {{Null(), buf2}, Null()}}; { InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; - GeoPoint point1; - auto cur_res1 = point1.from_coord(0, 0); - EXPECT_TRUE(cur_res1 == GEO_PARSE_OK); - GeoPoint point2; - auto cur_res2 = point2.from_coord(1, 0); - EXPECT_TRUE(cur_res2 == GEO_PARSE_OK); - - std::string buf1; - point1.encode_to(&buf1); - std::string buf2; - point2.encode_to(&buf2); - - DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966}, - {{buf1, Null()}, Null()}, - {{Null(), buf2}, Null()}}; - static_cast(check_function(func_name, input_types, data_set)); } + { + InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}}; + + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast( + check_function(func_name, input_types, const_dataset)); + } + } + { + InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String}; + + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast( + check_function(func_name, input_types, const_dataset)); + } + } } TEST(VGeoFunctionsTest, function_geo_st_contains) { std::string func_name = "st_contains"; + + std::string buf1; + std::string buf2; + std::string buf3; + GeoParseStatus status; + + std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"); + std::unique_ptr shape(GeoShape::from_wkt(shape1.data(), shape1.size(), &status)); + EXPECT_TRUE(status == GEO_PARSE_OK); + EXPECT_TRUE(shape != nullptr); + shape->encode_to(&buf1); + + GeoPoint point1; + status = point1.from_coord(5, 5); + EXPECT_TRUE(status == GEO_PARSE_OK); + point1.encode_to(&buf2); + + GeoPoint point2; + status = point2.from_coord(50, 50); + EXPECT_TRUE(status == GEO_PARSE_OK); + point2.encode_to(&buf3); + + DataSet data_set = {{{buf1, buf2}, (uint8_t)1}, + {{buf1, buf3}, (uint8_t)0}, + {{buf1, Null()}, Null()}, + {{Null(), buf3}, Null()}}; { InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; - std::string buf1; - std::string buf2; - std::string buf3; - GeoParseStatus status; - - std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"); - std::unique_ptr shape(GeoShape::from_wkt(shape1.data(), shape1.size(), &status)); - EXPECT_TRUE(status == GEO_PARSE_OK); - EXPECT_TRUE(shape != nullptr); - shape->encode_to(&buf1); - - GeoPoint point1; - status = point1.from_coord(5, 5); - EXPECT_TRUE(status == GEO_PARSE_OK); - point1.encode_to(&buf2); - - GeoPoint point2; - status = point2.from_coord(50, 50); - EXPECT_TRUE(status == GEO_PARSE_OK); - point2.encode_to(&buf3); - - DataSet data_set = {{{buf1, buf2}, (uint8_t)1}, - {{buf1, buf3}, (uint8_t)0}, - {{buf1, Null()}, Null()}, - {{Null(), buf3}, Null()}}; - static_cast(check_function(func_name, input_types, data_set)); } + { + InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String}; + + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast( + check_function(func_name, input_types, const_dataset)); + } + } + { + InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}}; + + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast( + check_function(func_name, input_types, const_dataset)); + } + } } TEST(VGeoFunctionsTest, function_geo_st_circle) {